diff --git a/RELEASE-NOTES.md b/RELEASE-NOTES.md
index dceefb95a1..89e623e4fa 100644
--- a/RELEASE-NOTES.md
+++ b/RELEASE-NOTES.md
@@ -5,20 +5,26 @@
 ### New features
 
 - Add `logit_p` keyword to `pm.Bernoulli`, so that users can specify the logit of the success probability. This is faster and more stable than using `p=tt.nnet.sigmoid(logit_p)`.
-- Add `random` keyword to `pm.DensityDist` thus enabling users to pass custom random method which in turn makes sampling from a `DensityDist` possible. 
+- Add `random` keyword to `pm.DensityDist` thus enabling users to pass custom random method which in turn makes sampling from a `DensityDist` possible.
 - Effective sample size computation is updated. The estimation uses Geyer's initial positive sequence, which no longer truncates the autocorrelation series inaccurately. `pm.diagnostics.effective_n` now can reports N_eff>N.
+- Added `KroneckerNormal` distribution and a corresponding `MarginalKron`
+  Gaussian Process implementation for efficient inference, along with
+  lower-level functions such as `cartesian` and `kronecker` products.
+- Added `Coregion` covariance function.
  
 
 ### Fixes
 
 - `VonMises` does not overflow for large values of kappa. i0 and i1 have been removed and we now use log_i0 to compute the logp.
 
+
 ### Deprecations
 
 - DIC and BPIC calculations have been removed
 - `njobs` and `nchains` kwarg are deprecated in favor of `cores` and `chains` for `sample`
 - `lag` kwarg in `pm.stats.autocorr` and `pm.stats.autocov` is deprecated.
 
+
 ## PyMC 3.3 (January 9, 2018)
 
 ### New features
diff --git a/pymc3/distributions/__init__.py b/pymc3/distributions/__init__.py
index 8892592f18..932432ce2b 100644
--- a/pymc3/distributions/__init__.py
+++ b/pymc3/distributions/__init__.py
@@ -57,6 +57,7 @@
 
 from .multivariate import MvNormal
 from .multivariate import MatrixNormal
+from .multivariate import KroneckerNormal
 from .multivariate import MvStudentT
 from .multivariate import Dirichlet
 from .multivariate import Multinomial
@@ -123,6 +124,7 @@
            'TensorType',
            'MvNormal',
            'MatrixNormal',
+           'KroneckerNormal',
            'MvStudentT',
            'Dirichlet',
            'Multinomial',
diff --git a/pymc3/distributions/dist_math.py b/pymc3/distributions/dist_math.py
index 2250cf1287..e7010d60a1 100644
--- a/pymc3/distributions/dist_math.py
+++ b/pymc3/distributions/dist_math.py
@@ -13,6 +13,9 @@
 from .special import gammaln
 from pymc3.theanof import floatX
 
+from six.moves import xrange
+from functools import partial
+
 f = floatX
 c = - .5 * np.log(2. * np.pi)
 
@@ -326,3 +329,135 @@ def grad(self, inputs, grads):
         x_grad, = grads
 
         return [x_grad * self.grad_op(x)]
+
+
+# Custom Eigh, EighGrad, and eigh are required until
+# https://github.com/Theano/Theano/pull/6557 is handled, since lambda's
+# cannot be used with pickling.
+class Eigh(tt.nlinalg.Eig):
+    """
+    Return the eigenvalues and eigenvectors of a Hermitian or symmetric matrix.
+
+    This is a copy of Eigh from theano that calls an EighGrad which uses
+    partial instead of lambda. Once this has been merged with theano this
+    should be removed.
+    """
+
+    _numop = staticmethod(np.linalg.eigh)
+    __props__ = ('UPLO',)
+
+    def __init__(self, UPLO='L'):
+        assert UPLO in ['L', 'U']
+        self.UPLO = UPLO
+
+    def make_node(self, x):
+        x = tt.as_tensor_variable(x)
+        assert x.ndim == 2
+        # Numpy's linalg.eigh may return either double or single
+        # presision eigenvalues depending on installed version of
+        # LAPACK.  Rather than trying to reproduce the (rather
+        # involved) logic, we just probe linalg.eigh with a trivial
+        # input.
+        w_dtype = self._numop([[np.dtype(x.dtype).type()]])[0].dtype.name
+        w = theano.tensor.vector(dtype=w_dtype)
+        v = theano.tensor.matrix(dtype=x.dtype)
+        return theano.gof.Apply(self, [x], [w, v])
+
+    def perform(self, node, inputs, outputs):
+        (x,) = inputs
+        (w, v) = outputs
+        w[0], v[0] = self._numop(x, self.UPLO)
+
+    def grad(self, inputs, g_outputs):
+        r"""The gradient function should return
+           .. math:: \sum_n\left(W_n\frac{\partial\,w_n}
+                           {\partial a_{ij}} +
+                     \sum_k V_{nk}\frac{\partial\,v_{nk}}
+                           {\partial a_{ij}}\right),
+        where [:math:`W`, :math:`V`] corresponds to ``g_outputs``,
+        :math:`a` to ``inputs``, and  :math:`(w, v)=\mbox{eig}(a)`.
+        Analytic formulae for eigensystem gradients are well-known in
+        perturbation theory:
+           .. math:: \frac{\partial\,w_n}
+                          {\partial a_{ij}} = v_{in}\,v_{jn}
+           .. math:: \frac{\partial\,v_{kn}}
+                          {\partial a_{ij}} =
+                \sum_{m\ne n}\frac{v_{km}v_{jn}}{w_n-w_m}
+        """
+        x, = inputs
+        w, v = self(x)
+        # Replace gradients wrt disconnected variables with
+        # zeros. This is a work-around for issue #1063.
+        gw, gv = tt.nlinalg._zero_disconnected([w, v], g_outputs)
+        return [EighGrad(self.UPLO)(x, w, v, gw, gv)]
+
+
+class EighGrad(theano.Op):
+    """
+    Gradient of an eigensystem of a Hermitian matrix.
+
+    This is a copy of EighGrad from theano that uses partial instead of lambda.
+    Once this has been merged with theano this should be removed.
+    """
+
+    __props__ = ('UPLO',)
+
+    def __init__(self, UPLO='L'):
+        assert UPLO in ['L', 'U']
+        self.UPLO = UPLO
+        if UPLO == 'L':
+            self.tri0 = np.tril
+            self.tri1 = partial(np.triu, k=1)
+        else:
+            self.tri0 = np.triu
+            self.tri1 = partial(np.tril, k=-1)
+
+    def make_node(self, x, w, v, gw, gv):
+        x, w, v, gw, gv = map(tt.as_tensor_variable, (x, w, v, gw, gv))
+        assert x.ndim == 2
+        assert w.ndim == 1
+        assert v.ndim == 2
+        assert gw.ndim == 1
+        assert gv.ndim == 2
+        out_dtype = theano.scalar.upcast(x.dtype, w.dtype, v.dtype,
+                                         gw.dtype, gv.dtype)
+        out = theano.tensor.matrix(dtype=out_dtype)
+        return theano.gof.Apply(self, [x, w, v, gw, gv], [out])
+
+    def perform(self, node, inputs, outputs):
+        """
+        Implements the "reverse-mode" gradient for the eigensystem of
+        a square matrix.
+        """
+        x, w, v, W, V = inputs
+        N = x.shape[0]
+        outer = np.outer
+
+        def G(n):
+            return sum(v[:, m] * V.T[n].dot(v[:, m]) / (w[n] - w[m])
+                       for m in xrange(N) if m != n)
+
+        g = sum(outer(v[:, n], v[:, n] * W[n] + G(n))
+                for n in xrange(N))
+
+        # Numpy's eigh(a, 'L') (eigh(a, 'U')) is a function of tril(a)
+        # (triu(a)) only.  This means that partial derivative of
+        # eigh(a, 'L') (eigh(a, 'U')) with respect to a[i,j] is zero
+        # for i < j (i > j).  At the same time, non-zero components of
+        # the gradient must account for the fact that variation of the
+        # opposite triangle contributes to variation of two elements
+        # of Hermitian (symmetric) matrix. The following line
+        # implements the necessary logic.
+        out = self.tri0(g) + self.tri1(g).T
+
+        # Make sure we return the right dtype even if NumPy performed
+        # upcasting in self.tri0.
+        outputs[0][0] = np.asarray(out, dtype=node.outputs[0].dtype)
+
+    def infer_shape(self, node, shapes):
+        return [shapes[0]]
+
+
+def eigh(a, UPLO='L'):
+    """A copy, remove with Eigh and EighGrad when possible"""
+    return Eigh(UPLO)(a)
diff --git a/pymc3/distributions/multivariate.py b/pymc3/distributions/multivariate.py
index 50a36d2166..3af91a8b26 100755
--- a/pymc3/distributions/multivariate.py
+++ b/pymc3/distributions/multivariate.py
@@ -20,12 +20,14 @@
 from ..model import Deterministic
 from .continuous import ChiSquared, Normal
 from .special import gammaln, multigammaln
-from .dist_math import bound, logpow, factln, Cholesky
+from .dist_math import bound, logpow, factln, Cholesky, eigh
+from ..math import kron_dot, kron_diag, kron_solve_lower, kronecker
 
 
 __all__ = ['MvNormal', 'MvStudentT', 'Dirichlet',
            'Multinomial', 'Wishart', 'WishartBartlett',
-           'LKJCorr', 'LKJCholeskyCov', 'MatrixNormal']
+           'LKJCorr', 'LKJCholeskyCov', 'MatrixNormal',
+           'KroneckerNormal']
 
 
 class _QuadFormBase(Continuous):
@@ -1293,3 +1295,220 @@ def logp(self, value):
         n = self.n
         norm = - 0.5 * m * n * pm.floatX(np.log(2 * np.pi))
         return norm - 0.5*trquaddist - m*half_collogdet - n*half_rowlogdet
+
+
+class KroneckerNormal(Continuous):
+    R"""
+    Multivariate normal log-likelihood with Kronecker-structured covariance.
+
+    .. math::
+
+       f(x \mid \mu, K) =
+           \frac{1}{(2\pi |K|)^{1/2}}
+           \exp\left\{ -\frac{1}{2} (x-\mu)^{\prime} K^{-1} (x-\mu) \right\}
+
+    ========  ==========================
+    Support   :math:`x \in \mathbb{R}^N`
+    Mean      :math:`\mu`
+    Variance  :math:`K = \bigotimes K_i` + \sigma^2 I_N
+    ========  ==========================
+
+    Parameters
+    ----------
+    mu : array
+        Vector of means, just as in `MvNormal`.
+    covs : list of arrays
+        The set of covariance matrices :math:`[K_1, K_2, ...]` to be
+        Kroneckered in the order provided :math:`\bigotimes K_i`.
+    chols : list of arrays
+        The set of lower cholesky matrices :math:`[L_1, L_2, ...]` such that
+        :math:`K_i = L_i L_i'`.
+    evds : list of tuples
+        The set of eigenvalue-vector, eigenvector-matrix pairs
+        :math:`[(v_1, Q_1), (v_2, Q_2), ...]` such that
+        :math:`K_i = Q_i \text{diag}(v_i) Q_i'`. For example::
+
+            v_i, Q_i = tt.nlinalg.eigh(K_i)
+
+    sigma : scalar, variable
+        Standard deviation of the Gaussian white noise.
+
+    Examples
+    --------
+    Define a multivariate normal variable with a covariance
+    :math:`K = K_1 \otimes K_2`
+
+    .. code:: python
+
+        K1 = np.array([[1., 0.5], [0.5, 2]])
+        K2 = np.array([[1., 0.4, 0.2], [0.4, 2, 0.3], [0.2, 0.3, 1]])
+        covs = [K1, K2]
+        N = 6
+        mu = np.zeros(N)
+        with pm.Model() as model:
+            vals = pm.KroneckerNormal('vals', mu=mu, covs=covs, shape=N)
+
+    Effeciency gains are made by cholesky decomposing :math:`K_1` and
+    :math:`K_2` individually rather than the larger :math:`K` matrix. Although
+    only two matrices :math:`K_1` and :math:`K_2` are shown here, an arbitrary
+    number of submatrices can be combined in this way. Choleskys and
+    eigendecompositions can be provided instead
+
+    .. code:: python
+
+        chols = [np.linalg.cholesky(Ki) for Ki in covs]
+        evds = [np.linalg.eigh(Ki) for Ki in covs]
+        with pm.Model() as model:
+            vals2 = pm.KroneckerNormal('vals2', mu=mu, chols=chols, shape=N)
+            # or
+            vals3 = pm.KroneckerNormal('vals3', mu=mu, evds=evds, shape=N)
+
+    neither of which will be converted. Diagonal noise can also be added to
+    the covariance matrix, :math:`K = K_1 \otimes K_2 + \sigma^2 I_N`.
+    Despite the noise removing the overall Kronecker structure of the matrix,
+    `KroneckerNormal` can continue to make efficient calculations by
+    utilizing eigendecompositons of the submatrices behind the scenes [1].
+    Thus,
+
+    .. code:: python
+
+        sigma = 0.1
+        with pm.Model() as noise_model:
+            vals = pm.KroneckerNormal('vals', mu=mu, covs=covs, sigma=sigma, shape=N)
+            vals2 = pm.KroneckerNormal('vals2', mu=mu, chols=chols, sigma=sigma, shape=N)
+            vals3 = pm.KroneckerNormal('vals3', mu=mu, evds=evds, sigma=sigma, shape=N)
+
+    are identical, with `covs` and `chols` each converted to
+    eigendecompositions.
+
+    References
+    ----------
+    .. [1] Saatchi, Y. (2011). "Scalable inference for structured Gaussian process models"
+    """
+
+    def __init__(self, mu, covs=None, chols=None, evds=None, sigma=None,
+                 *args, **kwargs):
+        self._setup(covs, chols, evds, sigma)
+        super(KroneckerNormal, self).__init__(*args, **kwargs)
+        self.mu = tt.as_tensor_variable(mu)
+        self.mean = self.median = self.mode = self.mu
+
+    def _setup(self, covs, chols, evds, sigma):
+        self.cholesky = Cholesky(nofail=False, lower=True)
+        if len([i for i in [covs, chols, evds] if i is not None]) != 1:
+            raise ValueError('Incompatible parameterization. '
+                             'Specify exactly one of covs, chols, '
+                             'or evds.')
+        self._isEVD = False
+        self.sigma = sigma
+        self.is_noisy = self.sigma is not None and self.sigma != 0
+        if covs is not None:
+            self._cov_type = 'cov'
+            self.covs = covs
+            if self.is_noisy:
+                # Noise requires eigendecomposition
+                eigh_map = map(eigh, covs)
+                self._setup_evd(eigh_map)
+            else:
+                # Otherwise use cholesky as usual
+                self.chols = list(map(self.cholesky, self.covs))
+                self.chol_diags = list(map(tt.nlinalg.diag, self.chols))
+                self.sizes = tt.as_tensor_variable(
+                                [chol.shape[0] for chol in self.chols])
+                self.N = tt.prod(self.sizes)
+        elif chols is not None:
+            self._cov_type = 'chol'
+            if self.is_noisy:  # A strange case...
+                # Noise requires eigendecomposition
+                covs = [tt.dot(chol, chol.T) for chol in chols]
+                eigh_map = map(eigh, covs)
+                self._setup_evd(eigh_map)
+            else:
+                self.chols = chols
+                self.chol_diags = list(map(tt.nlinalg.diag, self.chols))
+                self.sizes = tt.as_tensor_variable(
+                                [chol.shape[0] for chol in self.chols])
+                self.N = tt.prod(self.sizes)
+        else:
+            self._cov_type = 'evd'
+            self._setup_evd(evds)
+
+    def _setup_evd(self, eigh_iterable):
+        self._isEVD = True
+        eigs_sep, Qs = zip(*eigh_iterable)  # Unzip
+        self.Qs = list(map(tt.as_tensor_variable, Qs))
+        self.QTs = list(map(tt.transpose, self.Qs))
+
+        self.eigs_sep = list(map(tt.as_tensor_variable, eigs_sep))
+        self.eigs = kron_diag(*self.eigs_sep)  # Combine separate eigs
+        if self.is_noisy:
+            self.eigs += self.sigma**2
+        self.N = self.eigs.shape[0]
+
+    def _setup_random(self):
+        if not hasattr(self, 'mv_params'):
+            self.mv_params = {'mu': self.mu}
+            if self._cov_type == 'cov':
+                cov = kronecker(*self.covs)
+                if self.is_noisy:
+                    cov = cov + self.sigma**2 * tt.identity_like(cov)
+                self.mv_params['cov'] = cov
+            elif self._cov_type == 'chol':
+                if self.is_noisy:
+                    covs = []
+                    for eig, Q in zip(self.eigs_sep, self.Qs):
+                        cov_i = tt.dot(Q, tt.dot(tt.diag(eig), Q.T))
+                        covs.append(cov_i)
+                    cov = kronecker(*covs)
+                    if self.is_noisy:
+                        cov = cov + self.sigma**2 * tt.identity_like(cov)
+                    self.mv_params['chol'] = self.cholesky(cov)
+                else:
+                    self.mv_params['chol'] = kronecker(*self.chols)
+            elif self._cov_type == 'evd':
+                covs = []
+                for eig, Q in zip(self.eigs_sep, self.Qs):
+                    # print()
+                    cov_i = tt.dot(Q, tt.dot(tt.diag(eig), Q.T))
+                    covs.append(cov_i)
+                cov = kronecker(*covs)
+                if self.is_noisy:
+                    cov = cov + self.sigma**2 * tt.identity_like(cov)
+                self.mv_params['cov'] = cov
+
+    def random(self, point=None, size=None):
+        # Expand params into terms MvNormal can understand to force consistency
+        self._setup_random()
+        dist = MvNormal.dist(**self.mv_params)
+        return dist.random(point, size)
+
+    def _quaddist(self, value):
+        """Computes the quadratic (x-mu)^T @ K^-1 @ (x-mu) and log(det(K))"""
+        if value.ndim > 2 or value.ndim == 0:
+            raise ValueError('Invalid dimension for value: %s' % value.ndim)
+        if value.ndim == 1:
+            onedim = True
+            value = value[None, :]
+        else:
+            onedim = False
+
+        delta = value - self.mu
+        if self._isEVD:
+            sqrt_quad = kron_dot(self.QTs, delta.T)
+            sqrt_quad = sqrt_quad/tt.sqrt(self.eigs[:, None])
+            logdet = tt.sum(tt.log(self.eigs))
+        else:
+            sqrt_quad = kron_solve_lower(self.chols, delta.T)
+            logdet = 0
+            for chol_size, chol_diag in zip(self.sizes, self.chol_diags):
+                logchol = tt.log(chol_diag) * self.N/chol_size
+                logdet += tt.sum(2*logchol)
+        # Square each sample
+        quad = tt.batched_dot(sqrt_quad.T, sqrt_quad.T)
+        if onedim:
+            quad = quad[0]
+        return quad, logdet
+
+    def logp(self, value):
+        quad, logdet = self._quaddist(value)
+        return - (quad + logdet + self.N*tt.log(2*np.pi)) / 2.0
diff --git a/pymc3/gp/__init__.py b/pymc3/gp/__init__.py
index ca22f64977..d71db6b83e 100644
--- a/pymc3/gp/__init__.py
+++ b/pymc3/gp/__init__.py
@@ -1,4 +1,4 @@
 from . import cov
 from . import mean
 from . import util
-from .gp import Latent, Marginal, MarginalSparse, TP
+from .gp import Latent, Marginal, MarginalSparse, TP, MarginalKron
diff --git a/pymc3/gp/cov.py b/pymc3/gp/cov.py
index 9628db91b2..ba903c0076 100644
--- a/pymc3/gp/cov.py
+++ b/pymc3/gp/cov.py
@@ -15,7 +15,8 @@
            'Cosine',
            'Periodic',
            'WarpedInput',
-           'Gibbs']
+           'Gibbs',
+           'Coregion']
 
 
 class Covariance(object):
@@ -143,6 +144,41 @@ def __call__(self, X, Xs=None, diag=False):
         return reduce(mul, self.merge_factors(X, Xs, diag))
 
 
+class Kron(Covariance):
+    R"""Form a covariance object that is the kronecker product of other covariances.
+
+    In contrast to standard multiplication, where each covariance is given the
+    same inputs X and Xs, kronecker product covariances first split the inputs
+    into their respective spaces (inferred from the input_dim of each object)
+    before forming their product. Kronecker covariances have a larger
+    input dimension than any of its factors since the inputs are the
+    concatenated columns of its components.
+
+    Factors must be covariances or their combinations, arrays will not work.
+    """
+
+    def __init__(self, factor_list):
+        self.input_dims = [factor.input_dim for factor in factor_list]
+        input_dim = sum(self.input_dims)
+        super(Kron, self).__init__(input_dim=input_dim)
+        self.factor_list = factor_list
+
+    def _split(self, X, Xs):
+        indices = np.cumsum(self.input_dims)
+        X_split = np.hsplit(X, indices)
+        if Xs is not None:
+            Xs_split = np.hsplit(Xs, indices)
+        else:
+            Xs_split = [None] * len(X_split)
+        return X_split, Xs_split
+
+    def __call__(self, X, Xs=None, diag=False):
+        X_split, Xs_split = self._split(X, Xs)
+        covs = [cov(x, xs, diag) for cov, x, xs
+                in zip(self.factor_list, X_split, Xs_split)]
+        return reduce(mul, covs)
+
+
 class Constant(Covariance):
     R"""
     Constant valued covariance function.
@@ -515,3 +551,67 @@ def f(x, args):
                 args = (args,)
             return func(x, *args)
     return f
+
+
+class Coregion(Covariance):
+    R"""Covariance function for intrinsic/linear coregionalization models.
+    Adapted from GPy http://gpy.readthedocs.io/en/deploy/GPy.kern.src.html#GPy.kern.src.coregionalize.Coregionalize.
+
+    This covariance has the form:
+
+    .. math::
+
+       \mathbf{B} = \mathbf{W}\mathbf{W}^\top + \text{diag}(\kappa)
+
+    and calls must use integers associated with the index of the matrix.
+    This allows the api to remain consistent with other covariance objects:
+
+    .. math::
+
+        k(x, x') = \mathbf{B}[x, x'^\top]
+
+    Parameters
+    ----------
+    W : 2D array of shape (num_outputs, rank)
+        a low rank matrix that determines the correlations between
+        the different outputs (rows)
+    kappa : 1D array of shape (num_outputs, )
+        a vector which allows the outputs to behave independently
+    B : 2D array of shape (num_outputs, rank)
+        the total matrix, exactly one of (W, kappa) and B must be provided
+
+    Notes
+    -----
+    Exactly one dimension must be active for this kernel. Thus, if
+    `input_dim != 1`, then `active_dims` must have a length of one.
+    """
+
+    def __init__(self, input_dim, W=None, kappa=None, B=None, active_dims=None):
+        super(Coregion, self).__init__(input_dim, active_dims)
+        if len(self.active_dims) != 1:
+            raise ValueError('Coregion requires exactly one dimension to be active')
+        make_B = W is not None or kappa is not None
+        if make_B and B is not None:
+            raise ValueError('Exactly one of (W, kappa) and B must be provided to Coregion')
+        if make_B:
+            self.W = tt.as_tensor_variable(W)
+            self.kappa = tt.as_tensor_variable(kappa)
+            self.B = tt.dot(self.W, self.W.T) + tt.diag(self.kappa)
+        elif B is not None:
+            self.B = tt.as_tensor_variable(B)
+        else:
+            raise ValueError('Exactly one of (W, kappa) and B must be provided to Coregion')
+
+    def full(self, X, Xs=None):
+        X, Xs = self._slice(X, Xs)
+        index = tt.cast(X, 'int32')
+        if Xs is None:
+            index2 = index.T
+        else:
+            index2 = tt.cast(Xs, 'int32').T
+        return self.B[index, index2]
+
+    def diag(self, X):
+        X, _ = self._slice(X, None)
+        index = tt.cast(X, 'int32')
+        return tt.diag(self.B)[index.ravel()]
diff --git a/pymc3/gp/gp.py b/pymc3/gp/gp.py
index 3a58491dbd..a692166ecc 100644
--- a/pymc3/gp/gp.py
+++ b/pymc3/gp/gp.py
@@ -9,8 +9,10 @@
 from pymc3.gp.util import (conditioned_vars,
                            infer_shape, stabilize, cholesky, solve_lower, solve_upper)
 from pymc3.distributions import draw_values
+from pymc3.distributions.dist_math import eigh
+from ..math import cartesian, kron_dot, kron_diag
 
-__all__ = ['Latent', 'Marginal', 'TP', 'MarginalSparse']
+__all__ = ['Latent', 'Marginal', 'TP', 'MarginalSparse', 'MarginalKron']
 
 
 class Base(object):
@@ -787,3 +789,245 @@ def conditional(self, name, Xnew, pred_noise=False, given=None, **kwargs):
         chol = cholesky(cov)
         shape = infer_shape(Xnew, kwargs.pop("shape", None))
         return pm.MvNormal(name, mu=mu, chol=chol, shape=shape, **kwargs)
+
+
+@conditioned_vars(["Xs", "y", "sigma"])
+class MarginalKron(Base):
+    R"""
+    Marginal Gaussian process whose covariance is a tensor product kernel.
+
+    The `gp.MarginalKron` class is an implementation of the sum of a Kronecker
+    GP prior and additive white noise. It has `marginal_likelihood`,
+    `conditional` and `predict` methods. This GP implementation can be used to
+    efficiently implement regression on data that are normally distributed with
+    a tensor product kernel and are measured on a full grid of inputs:
+    `cartesian(*Xs)`. `MarginalKron` is based on the `KroneckerNormal`
+    distribution, see its docstring for more information. For more information
+    on the `prior` and `conditional` methods, see their docstrings.
+
+    Parameters
+    ----------
+    cov_funcs : list of Covariance objects
+        The covariance functions that compose the tensor (Kronecker) product.
+        Defaults to [zero].
+    mean_func : None, instance of Mean
+        The mean function.  Defaults to zero.
+
+    Examples
+    --------
+    .. code:: python
+
+        # One dimensional column vectors of inputs
+        X1 = np.linspace(0, 1, 10)[:, None]
+        X2 = np.linspace(0, 2, 5)[:, None]
+        Xs = [X1, X2]
+        y = np.random.randn(len(X1)*len(X2))  # toy data
+        with pm.Model() as model:
+            # Specify the covariance functions for each Xi
+            cov_func1 = pm.gp.cov.ExpQuad(1, ls=0.1)  # Must accept X1 without error
+            cov_func2 = pm.gp.cov.ExpQuad(1, ls=0.3)  # Must accept X2 without error
+
+            # Specify the GP.  The default mean function is `Zero`.
+            gp = pm.gp.MarginalKron(cov_funcs=[cov_func1, cov_func2])
+
+            # Place a GP prior over the function f.
+            sigma = pm.HalfCauchy("sigma", beta=3)
+            y_ = gp.marginal_likelihood("y", Xs=Xs, y=y, sigma=sigma)
+
+            # ...
+
+        # After fitting or sampling, specify the distribution
+        # at new points with .conditional
+        # Xnew need not be on a full grid
+        Xnew1 = np.linspace(-1, 2, 10)[:, None]
+        Xnew2 = np.linspace(0, 3, 10)[:, None]
+        Xnew = np.concatenate((Xnew1, Xnew2), axis=1)  # Not full grid, works
+        Xnew = pm.math.cartesian(Xnew1, Xnew2)  # Full grid, also works
+
+        with model:
+            fcond = gp.conditional("fcond", Xnew=Xnew)
+    """
+
+    def __init__(self, mean_func=Zero(), cov_funcs=(Constant(0.0))):
+        try:
+            self.cov_funcs = list(cov_funcs)
+        except TypeError:
+            self.cov_funcs = [cov_funcs]
+        cov_func = pm.gp.cov.Kron(self.cov_funcs)
+        super(MarginalKron, self).__init__(mean_func, cov_func)
+
+    def __add__(self, other):
+        raise TypeError("Efficient implementation of additive, Kronecker-structured processes not implemented")
+
+    def _build_marginal_likelihood(self, Xs):
+        self.X = cartesian(*Xs)
+        mu = self.mean_func(self.X)
+        covs = [f(X) for f, X in zip(self.cov_funcs, Xs)]
+        return mu, covs
+
+    def _check_inputs(self, Xs, y, sigma):
+        N = np.prod([len(X) for X in Xs])
+        if len(Xs) != len(self.cov_funcs):
+            raise ValueError('Must provide a covariance function for each X')
+        if N != len(y):
+            raise ValueError('Length of y must match cartesian product of Xs')
+
+    def marginal_likelihood(self, name, Xs, y, sigma, is_observed=True, **kwargs):
+        """
+        Returns the marginal likelihood distribution, given the input
+        locations `cartesian(*Xs)` and the data `y`.
+
+        Parameters
+        ----------
+        name : string
+            Name of the random variable
+        Xs : list of array-like
+            Function input values for each covariance function. Each entry
+            must be passable to its respective covariance without error. The
+            total covariance function is measured on the full grid
+            `cartesian(*Xs)`.
+        y : array-like
+            Data that is the sum of the function with the GP prior and Gaussian
+            noise.  Must have shape `(n, )`.
+        sigma : scalar, Variable
+            Standard deviation of the white Gaussian noise.
+        is_observed : bool
+            Whether to set `y` as an `observed` variable in the `model`.
+            Default is `True`.
+        **kwargs
+            Extra keyword arguments that are passed to `KroneckerNormal`
+            distribution constructor.
+        """
+        self._check_inputs(Xs, y, sigma)
+        mu, covs = self._build_marginal_likelihood(Xs)
+        self.Xs = Xs
+        self.y = y
+        self.sigma = sigma
+        if is_observed:
+            return pm.KroneckerNormal(name, mu=mu, covs=covs, sigma=sigma,
+                                      observed=y, **kwargs)
+        else:
+            shape = np.prod([len(X) for X in Xs])
+            return pm.KroneckerNormal(name, mu=mu, covs=covs, sigma=sigma,
+                                      shape=shape, **kwargs)
+
+    def _build_conditional(self, Xnew, pred_noise, diag):
+        Xs, y, sigma = self.Xs, self.y, self.sigma
+
+        # Old points
+        X = cartesian(*Xs)
+        delta = y - self.mean_func(X)
+        Kns = [f(x) for f, x in zip(self.cov_funcs, Xs)]
+        eigs_sep, Qs = zip(*map(eigh, Kns))  # Unzip
+        QTs = list(map(tt.transpose, Qs))
+        eigs = kron_diag(*eigs_sep)  # Combine separate eigs
+        if sigma is not None:
+            eigs += sigma**2
+
+        # New points
+        Km = self.cov_func(Xnew, diag=diag)
+        Knm = self.cov_func(X, Xnew)
+        Kmn = Knm.T
+
+        # Build conditional mu
+        alpha = kron_dot(QTs, delta)
+        alpha = alpha/eigs[:, None]
+        alpha = kron_dot(Qs, alpha)
+        mu = tt.dot(Kmn, alpha).ravel() + self.mean_func(Xnew)
+
+        # Build conditional cov
+        A = kron_dot(QTs, Knm)
+        A = A/tt.sqrt(eigs[:, None])
+        if diag:
+            Asq = tt.sum(tt.square(A), 0)
+            cov = Km - Asq
+            if pred_noise:
+                cov += sigma
+        else:
+            Asq = tt.dot(A.T, A)
+            cov = Km - Asq
+            if pred_noise:
+                cov += sigma * np.eye(cov.shape)
+        return mu, cov
+
+    def conditional(self, name, Xnew, pred_noise=False, **kwargs):
+        """
+        Returns the conditional distribution evaluated over new input
+        locations `Xnew`, just as in `Marginal`.
+
+        `Xnew` will be split
+        by columns and fed to the relevant covariance functions based on their
+        `input_dim`. For example, if `cov_func1`, `cov_func2`, and `cov_func3`
+        have `input_dim` of 2, 1, and 4, respectively, then `Xnew` must have
+        7 columns and a covariance between the prediction points
+
+        .. code:: python
+
+            cov_func(Xnew) = cov_func1(Xnew[:, :2]) * cov_func1(Xnew[:, 2:3]) * cov_func1(Xnew[:, 3:])
+
+        This `cov_func` does not have a Kronecker structure without a full
+        grid, but the conditional distribution does not have a Kronecker
+        structure regardless. Thus, the conditional method must fall back to
+        using `MvNormal` rather than `KroneckerNormal` in either case.
+
+        Parameters
+        ----------
+        name : string
+            Name of the random variable
+        Xnew : array-like
+            Function input values.  If one-dimensional, must be a column
+            vector with shape `(n, 1)`.
+        pred_noise : bool
+            Whether or not observation noise is included in the conditional.
+            Default is `False`.
+        **kwargs
+            Extra keyword arguments that are passed to `MvNormal` distribution
+            constructor.
+        """
+        mu, cov = self._build_conditional(Xnew, pred_noise, False)
+        chol = cholesky(stabilize(cov))
+        shape = infer_shape(Xnew, kwargs.pop("shape", None))
+        return pm.MvNormal(name, mu=mu, chol=chol, shape=shape, **kwargs)
+
+    def predict(self, Xnew, point=None, diag=False, pred_noise=False):
+        R"""
+        Return the mean vector and covariance matrix of the conditional
+        distribution as numpy arrays, given a `point`, such as the MAP
+        estimate or a sample from a `trace`.
+
+        Parameters
+        ----------
+        Xnew : array-like
+            Function input values.  If one-dimensional, must be a column
+            vector with shape `(n, 1)`.
+        point : pymc3.model.Point
+            A specific point to condition on.
+        diag : bool
+            If `True`, return the diagonal instead of the full covariance
+            matrix.  Default is `False`.
+        pred_noise : bool
+            Whether or not observation noise is included in the conditional.
+            Default is `False`.
+        """
+        mu, cov = self._build_conditional(Xnew, pred_noise, diag)
+        return draw_values([mu, cov], point=point)
+
+    def predictt(self, Xnew, diag=False, pred_noise=False):
+        R"""
+        Return the mean vector and covariance matrix of the conditional
+        distribution as symbolic variables.
+
+        Parameters
+        ----------
+        Xnew : array-like
+            Function input values.  If one-dimensional, must be a column
+            vector with shape `(n, 1)`.
+        diag : bool
+            If `True`, return the diagonal instead of the full covariance
+            matrix.  Default is `False`.
+        pred_noise : bool
+            Whether or not observation noise is included in the conditional.
+            Default is `False`.
+        """
+        mu, cov = self._build_conditional(Xnew, pred_noise, diag)
+        return mu, cov
diff --git a/pymc3/math.py b/pymc3/math.py
index 79825d70d1..32ed3d7865 100644
--- a/pymc3/math.py
+++ b/pymc3/math.py
@@ -18,10 +18,84 @@
 import scipy.sparse
 from scipy.linalg import block_diag as scipy_block_diag
 from pymc3.theanof import floatX, largest_common_dtype, ix_
+from functools import reduce, partial
 
 # pylint: enable=unused-import
 
 
+def kronecker(*Ks):
+    """Return the Kronecker product of arguments:
+            K_1 \otimes K_2 \otimes ... \otimes K_D
+
+    Parameters
+    ----------
+    Ks: 2D array-like
+    """
+    return reduce(tt.slinalg.kron, Ks)
+
+
+def cartesian(*arrays):
+    """Makes the Cartesian product of arrays.
+
+    Parameters
+    ----------
+    arrays: 1D array-like
+            1D arrays where earlier arrays loop more slowly than later ones
+    """
+    N = len(arrays)
+    return np.stack(np.meshgrid(*arrays, indexing='ij'), -1).reshape(-1, N)
+
+
+def kron_matrix_op(krons, m, op):
+    """Apply op to krons and m in a way that reproduces op(kronecker(*krons), m)
+
+    Parameters
+    -----------
+    krons: list of square 2D array-like objects
+           D square matrices [A_1, A_2, ..., A_D] to be Kronecker'ed:
+              A = A_1 \otimes A_2 \otimes ... \otimes A_D
+           Product of column dimensions must be N
+    m    : NxM array or 1D array (treated as Nx1)
+           Object that krons act upon
+    """
+    def flat_matrix_op(flat_mat, mat):
+        Nmat = mat.shape[1]
+        flat_shape = flat_mat.shape
+        mat2 = flat_mat.reshape((Nmat, -1))
+        return op(mat, mat2).T.reshape(flat_shape)
+
+    def kron_vector_op(v):
+        return reduce(flat_matrix_op, krons, v)
+
+    if m.ndim == 1:
+        m = m[:, None]  # Treat 1D array as Nx1 matrix
+    if m.ndim != 2:  # Has not been tested otherwise
+        raise ValueError('m must have ndim <= 2, not {}'.format(mat.ndim))
+    m = m.T
+    res, _ = theano.scan(kron_vector_op, sequences=[m])
+    return res.T
+
+
+# Define kronecker functions that work on 1D and 2D arrays
+kron_dot = partial(kron_matrix_op, op=tt.dot)
+kron_solve_lower = partial(kron_matrix_op, op=tt.slinalg.solve_lower_triangular)
+
+
+def flat_outer(a, b):
+    return tt.outer(a, b).ravel()
+
+
+def kron_diag(*diags):
+    """Returns diagonal of a kronecker product.
+
+    Parameters
+    ----------
+    diags: 1D arrays
+           The diagonals of matrices that are to be Kroneckered
+    """
+    return reduce(flat_outer, diags)
+
+
 def tround(*args, **kwargs):
     """
     Temporary function to silence round warning in Theano. Please remove
diff --git a/pymc3/tests/test_distributions.py b/pymc3/tests/test_distributions.py
index f32e9b29c2..05b5aa5e9a 100644
--- a/pymc3/tests/test_distributions.py
+++ b/pymc3/tests/test_distributions.py
@@ -14,7 +14,8 @@
                              NegativeBinomial, Geometric, Exponential, ExGaussian, Normal,
                              Flat, LKJCorr, Wald, ChiSquared, HalfNormal, DiscreteUniform,
                              Bound, Uniform, Triangular, Binomial, SkewNormal, DiscreteWeibull,
-                             Gumbel, Logistic, Interpolated, ZeroInflatedBinomial, HalfFlat, AR1)
+                             Gumbel, Logistic, Interpolated, ZeroInflatedBinomial, HalfFlat, AR1,
+                             KroneckerNormal)
 from ..distributions import continuous
 from pymc3.theanof import floatX
 from numpy import array, inf, log, exp
@@ -28,7 +29,7 @@
 import scipy.stats
 import theano
 import theano.tensor as tt
-
+from ..math import kronecker
 
 def get_lkj_cases():
     """
@@ -264,6 +265,33 @@ def matrix_normal_logpdf_chol(value, mu, rowchol, colchol):
                                     np.dot(colchol, colchol.T))
 
 
+def kron_normal_logpdf_cov(value, mu, covs, sigma):
+    cov = kronecker(*covs).eval()
+    if sigma is not None:
+        cov += sigma**2 * np.eye(*cov.shape)
+    return scipy.stats.multivariate_normal.logpdf(value, mu, cov).sum()
+
+
+def kron_normal_logpdf_chol(value, mu, chols, sigma):
+    covs = [np.dot(chol, chol.T) for chol in chols]
+    return kron_normal_logpdf_cov(value, mu, covs, sigma=sigma)
+
+
+def kron_normal_logpdf_evd(value, mu, evds, sigma):
+    covs = []
+    for eigs, Q in evds:
+        try:
+            eigs = eigs.eval()
+        except AttributeError:
+            pass
+        try:
+            Q = Q.eval()
+        except AttributeError:
+            pass
+        covs.append(np.dot(Q, np.dot(np.diag(eigs), Q.T)))
+    return kron_normal_logpdf_cov(value, mu, covs, sigma)
+
+
 def betafn(a):
     return floatX(scipy.special.gammaln(a).sum(-1) - scipy.special.gammaln(a.sum(-1)))
 
@@ -373,15 +401,23 @@ def PdMatrixCholUpper(n):
         raise ValueError("n out of bounds")
 
 
+def RandomPdMatrix(n):
+    A = np.random.rand(n, n)
+    return np.dot(A, A.T) + n * np.identity(n)
+
+
 class TestMatchesScipy(SeededTest):
     def pymc3_matches_scipy(self, pymc3_dist, domain, paramdomains, scipy_dist,
-                            decimal=None, extra_args=None):
+                            decimal=None, extra_args=None, scipy_args=None):
         if extra_args is None:
             extra_args = {}
+        if scipy_args is None:
+            scipy_args = {}
         model = build_model(pymc3_dist, domain, paramdomains, extra_args)
         value = model.named_vars['value']
 
         def logp(args):
+            args.update(scipy_args)
             return scipy_dist(**args)
         self.check_logp(model, value, domain, paramdomains, logp, decimal=decimal)
 
@@ -741,6 +777,50 @@ def test_matrixnormal(self, n):
                                  matrix_normal_logpdf_chol,
                                  decimal=select_by_precision(float64=6, float32=0))
 
+    @pytest.mark.parametrize('n', [2, 3])
+    @pytest.mark.parametrize('m', [3])
+    @pytest.mark.parametrize('sigma', [None, 1.0])
+    def test_kroneckernormal(self, n, m, sigma):
+        np.random.seed(5)
+        N = n*m
+        covs = [RandomPdMatrix(n), RandomPdMatrix(m)]
+        chols = list(map(np.linalg.cholesky, covs))
+        evds = list(map(np.linalg.eigh, covs))
+        dom = Domain([np.random.randn(N)*0.1], edges=(None, None), shape=N)
+        mu = Domain([np.random.randn(N)*0.1], edges=(None, None), shape=N)
+
+        std_args = {'mu': mu}
+        cov_args = {'covs': covs}
+        chol_args = {'chols': chols}
+        evd_args = {'evds': evds}
+        if sigma is not None and sigma != 0:
+            std_args['sigma'] = Domain([sigma], edges=(None, None))
+        else:
+            for args in [cov_args, chol_args, evd_args]:
+                args['sigma'] = sigma
+
+        self.pymc3_matches_scipy(
+             KroneckerNormal, dom, std_args, kron_normal_logpdf_cov,
+             extra_args=cov_args, scipy_args=cov_args)
+        self.pymc3_matches_scipy(
+             KroneckerNormal, dom, std_args, kron_normal_logpdf_chol,
+             extra_args=chol_args, scipy_args=chol_args)
+        self.pymc3_matches_scipy(
+             KroneckerNormal, dom, std_args, kron_normal_logpdf_evd,
+             extra_args=evd_args, scipy_args=evd_args)
+
+        dom = Domain([np.random.randn(2, N)*0.1], edges=(None, None), shape=(2, N))
+
+        self.pymc3_matches_scipy(
+             KroneckerNormal, dom, std_args, kron_normal_logpdf_cov,
+             extra_args=cov_args, scipy_args=cov_args)
+        self.pymc3_matches_scipy(
+             KroneckerNormal, dom, std_args, kron_normal_logpdf_chol,
+             extra_args=chol_args, scipy_args=chol_args)
+        self.pymc3_matches_scipy(
+             KroneckerNormal, dom, std_args, kron_normal_logpdf_evd,
+             extra_args=evd_args, scipy_args=evd_args)
+
     @pytest.mark.parametrize('n', [1, 2])
     def test_mvt(self, n):
         self.pymc3_matches_scipy(MvStudentT, Vector(R, n),
diff --git a/pymc3/tests/test_distributions_random.py b/pymc3/tests/test_distributions_random.py
index b622b9b65d..10d731d4c4 100644
--- a/pymc3/tests/test_distributions_random.py
+++ b/pymc3/tests/test_distributions_random.py
@@ -13,16 +13,20 @@
 from .test_distributions import (
     build_model, Domain, product, R, Rplus, Rplusbig, Rplusdunif,
     Unit, Nat, NatSmall, I, Simplex, Vector, PdMatrix,
-    PdMatrixChol, PdMatrixCholUpper, RealMatrix
+    PdMatrixChol, PdMatrixCholUpper, RealMatrix, RandomPdMatrix
 )
 
 
 def pymc3_random(dist, paramdomains, ref_rand, valuedomain=Domain([0]),
-                 size=10000, alpha=0.05, fails=10, extra_args=None):
+                 size=10000, alpha=0.05, fails=10, extra_args=None,
+                 model_args=None):
+    if model_args is None:
+        model_args = {}
     model = build_model(dist, valuedomain, paramdomains, extra_args)
     domains = paramdomains.copy()
     for pt in product(domains, n_samples=100):
         pt = pm.Point(pt, model=model)
+        pt.update(model_args)
         p = alpha
         # Allow KS test to fail (i.e., the samples be different)
         # a certain number of times. Crude, but necessary.
@@ -586,6 +590,54 @@ def ref_rand_uchol(size, mu, rowchol, colchol):
             #     extra_args={'lower': False}
             # )
 
+    def test_kronecker_normal(self):
+        def ref_rand(size, mu, covs, sigma):
+            cov = pm.math.kronecker(covs[0], covs[1]).eval()
+            cov += sigma**2 * np.identity(cov.shape[0])
+            return st.multivariate_normal.rvs(mean=mu, cov=cov, size=size)
+
+        def ref_rand_chol(size, mu, chols, sigma):
+            covs = [np.dot(chol, chol.T) for chol in chols]
+            return ref_rand(size, mu, covs, sigma)
+
+        def ref_rand_evd(size, mu, evds, sigma):
+            covs = []
+            for eigs, Q in evds:
+                covs.append(np.dot(Q, np.dot(np.diag(eigs), Q.T)))
+            return ref_rand(size, mu, covs, sigma)
+
+        sizes = [2, 3]
+        sigmas = [0, 1]
+        for n, sigma in zip(sizes, sigmas):
+            N = n**2
+            covs = [RandomPdMatrix(n), RandomPdMatrix(n)]
+            chols = list(map(np.linalg.cholesky, covs))
+            evds = list(map(np.linalg.eigh, covs))
+            dom = Domain([np.random.randn(N)*0.1], edges=(None, None), shape=N)
+            mu = Domain([np.random.randn(N)*0.1], edges=(None, None), shape=N)
+
+            std_args = {'mu': mu}
+            cov_args = {'covs': covs}
+            chol_args = {'chols': chols}
+            evd_args = {'evds': evds}
+            if sigma is not None and sigma != 0:
+                std_args['sigma'] = Domain([sigma], edges=(None, None))
+            else:
+                for args in [cov_args, chol_args, evd_args]:
+                    args['sigma'] = sigma
+
+            pymc3_random(
+                 pm.KroneckerNormal, std_args, valuedomain=dom,
+                 ref_rand=ref_rand, extra_args=cov_args, model_args=cov_args)
+            pymc3_random(
+                 pm.KroneckerNormal, std_args, valuedomain=dom,
+                 ref_rand=ref_rand_chol, extra_args=chol_args,
+                 model_args=chol_args)
+            pymc3_random(
+                 pm.KroneckerNormal, std_args, valuedomain=dom,
+                 ref_rand=ref_rand_evd, extra_args=evd_args,
+                 model_args=evd_args)
+
     def test_mv_t(self):
         def ref_rand(size, nu, Sigma, mu):
             normal = st.multivariate_normal.rvs(cov=Sigma, size=size).T
diff --git a/pymc3/tests/test_gp.py b/pymc3/tests/test_gp.py
index c864da858b..1c9e474597 100644
--- a/pymc3/tests/test_gp.py
+++ b/pymc3/tests/test_gp.py
@@ -1,5 +1,6 @@
 #  pylint:disable=unused-variable
 from functools import reduce
+from ..math import cartesian, kronecker
 from operator import add
 import pymc3 as pm
 import theano
@@ -10,6 +11,7 @@
 
 np.random.seed(101)
 
+
 class TestZeroMean(object):
     def test_value(self):
         X = np.linspace(0, 1, 10)[:, None]
@@ -222,6 +224,37 @@ def test_multiops(self):
         npt.assert_allclose(np.diag(K2), K1d, atol=1e-5)
 
 
+class TestCovKron(object):
+    def test_symprod_cov(self):
+        X1 = np.linspace(0, 1, 10)[:, None]
+        X2 = np.linspace(0, 1, 10)[:, None]
+        X = cartesian(X1, X2)
+        with pm.Model() as model:
+            cov1 = pm.gp.cov.ExpQuad(1, 0.1)
+            cov2 = pm.gp.cov.ExpQuad(1, 0.1)
+            cov = pm.gp.cov.Kron([cov1, cov2])
+        K = theano.function([], cov(X))()
+        npt.assert_allclose(K[0, 1], 1 * 0.53940, atol=1e-3)
+        npt.assert_allclose(K[0, 11], 0.53940 * 0.53940, atol=1e-3)
+        # check diagonal
+        Kd = theano.function([], cov(X, diag=True))()
+        npt.assert_allclose(np.diag(K), Kd, atol=1e-5)
+
+    def test_multiops(self):
+        X1 = np.linspace(0, 1, 3)[:, None]
+        X21 = np.linspace(0, 1, 5)[:, None]
+        X22 = np.linspace(0, 1, 4)[:, None]
+        X2 = cartesian(X21, X22)
+        X = cartesian(X1, X21, X22)
+        with pm.Model() as model:
+            cov1 = 3 + pm.gp.cov.ExpQuad(1, 0.1) + pm.gp.cov.ExpQuad(1, 0.1) * pm.gp.cov.ExpQuad(1, 0.1)
+            cov2 = pm.gp.cov.ExpQuad(1, 0.1) * pm.gp.cov.ExpQuad(2, 0.1)
+            cov = pm.gp.cov.Kron([cov1, cov2])
+        K_true = kronecker(theano.function([], cov1(X1))(), theano.function([], cov2(X2))()).eval()
+        K = theano.function([], cov(X))()
+        npt.assert_allclose(K_true, K)
+
+
 class TestCovSliceDim(object):
     def test_slice1(self):
         X = np.linspace(0, 1, 30).reshape(10, 3)
@@ -538,6 +571,74 @@ def func_twoarg(x, a, b):
         assert func_twoarg(x, a, b) == func_twoarg2(x, args=(a, b))
 
 
+class TestCoregion(object):
+    def setup_method(self):
+        self.nrows = 6
+        self.ncols = 3
+        self.W = np.random.rand(self.nrows, self.ncols)
+        self.kappa = np.random.rand(self.nrows)
+        self.B = np.dot(self.W, self.W.T) + np.diag(self.kappa)
+        self.rand_rows = np.random.randint(0, self.nrows, size=(20, 1))
+        self.rand_cols = np.random.randint(0, self.ncols, size=(10, 1))
+        self.X = np.concatenate((self.rand_rows, np.random.rand(20, 1)), axis=1)
+        self.Xs = np.concatenate((self.rand_cols, np.random.rand(10, 1)), axis=1)
+
+    def test_full(self):
+        B_mat = self.B[self.rand_rows, self.rand_rows.T]
+        with pm.Model() as model:
+            B = pm.gp.cov.Coregion(2, W=self.W, kappa=self.kappa, active_dims=[0])
+            npt.assert_allclose(
+                B(np.array([[2, 1.5], [3, -42]])).eval(),
+                self.B[2:4, 2:4]
+                )
+            npt.assert_allclose(B(self.X).eval(), B_mat)
+
+    def test_fullB(self):
+        B_mat = self.B[self.rand_rows, self.rand_rows.T]
+        with pm.Model() as model:
+            B = pm.gp.cov.Coregion(1, B=self.B)
+            npt.assert_allclose(
+                B(np.array([[2], [3]])).eval(),
+                self.B[2:4, 2:4]
+                )
+            npt.assert_allclose(B(self.X).eval(), B_mat)
+
+    def test_Xs(self):
+        B_mat = self.B[self.rand_rows, self.rand_cols.T]
+        with pm.Model() as model:
+            B = pm.gp.cov.Coregion(2, W=self.W, kappa=self.kappa, active_dims=[0])
+            npt.assert_allclose(
+                B(np.array([[2, 1.5]]), np.array([[3, -42]])).eval(),
+                self.B[2, 3]
+                )
+            npt.assert_allclose(B(self.X, self.Xs).eval(), B_mat)
+
+    def test_diag(self):
+        B_diag = np.diag(self.B)[self.rand_rows.ravel()]
+        with pm.Model() as model:
+            B = pm.gp.cov.Coregion(2, W=self.W, kappa=self.kappa, active_dims=[0])
+            npt.assert_allclose(
+                B(np.array([[2, 1.5]]), diag=True).eval(),
+                np.diag(self.B)[2]
+                )
+            npt.assert_allclose(B(self.X, diag=True).eval(), B_diag)
+
+    def test_raises(self):
+        with pm.Model() as model:
+            with pytest.raises(ValueError):
+                B = pm.gp.cov.Coregion(2, W=self.W, kappa=self.kappa)
+
+    def test_raises2(self):
+        with pm.Model() as model:
+            with pytest.raises(ValueError):
+                B = pm.gp.cov.Coregion(1, W=self.W, kappa=self.kappa, B=self.B)
+
+    def test_raises3(self):
+        with pm.Model() as model:
+            with pytest.raises(ValueError):
+                B = pm.gp.cov.Coregion(1)
+
+
 class TestMarginalVsLatent(object):
     R"""
     Compare the logp of models Marginal, noise=0 and Latent.
@@ -579,7 +680,7 @@ def testLatent2(self):
         chol = np.linalg.cholesky(cov_func(self.X).eval())
         y_rotated = np.linalg.solve(chol, self.y - 0.5)
         latent_logp = model.logp({"f_rotated_": y_rotated, "p": self.pnew})
-        npt.assert_allclose(latent_logp, self.logp, atol=0, rtol=1e-2)
+        npt.assert_allclose(latent_logp, self.logp, atol=5)
 
 
 class TestMarginalVsMarginalSparse(object):
@@ -810,13 +911,59 @@ def testAdditiveTPRaises(self):
                 gp1 + gp2
 
 
-
-
-
-
-
-
-
-
-
-
+class TestMarginalKron(object):
+    def setup_method(self):
+        self.Xs = [np.linspace(0, 1, 7)[:, None],
+                   np.linspace(0, 1, 5)[:, None],
+                   np.linspace(0, 1, 6)[:, None]]
+        self.X = cartesian(*self.Xs)
+        self.N = np.prod([len(X) for X in self.Xs])
+        self.y = np.random.randn(self.N) * 0.1
+        self.Xnews = [np.random.randn(5, 1),
+                      np.random.randn(5, 1),
+                      np.random.randn(5, 1)]
+        self.Xnew = np.concatenate(tuple(self.Xnews), axis=1)
+        self.sigma = 0.2
+        self.pnew = np.random.randn(len(self.Xnew))*0.01
+        ls = 0.2
+        with pm.Model() as model:
+            self.cov_funcs = [pm.gp.cov.ExpQuad(1, ls),
+                              pm.gp.cov.ExpQuad(1, ls),
+                              pm.gp.cov.ExpQuad(1, ls)]
+            cov_func = pm.gp.cov.Kron(self.cov_funcs)
+            self.mean = pm.gp.mean.Constant(0.5)
+            gp = pm.gp.Marginal(mean_func=self.mean, cov_func=cov_func)
+            f = gp.marginal_likelihood("f", self.X, self.y, noise=self.sigma)
+            p = gp.conditional("p", self.Xnew)
+            self.mu, self.cov = gp.predict(self.Xnew)
+        self.logp = model.logp({"p": self.pnew})
+
+    def testMarginalKronvsMarginalpredict(self):
+        with pm.Model() as kron_model:
+            kron_gp = pm.gp.MarginalKron(mean_func=self.mean,
+                                         cov_funcs=self.cov_funcs)
+            f = kron_gp.marginal_likelihood('f', self.Xs, self.y,
+                                            sigma=self.sigma, shape=self.N)
+            p = kron_gp.conditional('p', self.Xnew)
+            mu, cov = kron_gp.predict(self.Xnew)
+        npt.assert_allclose(mu, self.mu, atol=0, rtol=1e-2)
+        npt.assert_allclose(cov, self.cov, atol=0, rtol=1e-2)
+
+    def testMarginalKronvsMarginal(self):
+        with pm.Model() as kron_model:
+            kron_gp = pm.gp.MarginalKron(mean_func=self.mean,
+                                         cov_funcs=self.cov_funcs)
+            f = kron_gp.marginal_likelihood('f', self.Xs, self.y,
+                                            sigma=self.sigma, shape=self.N)
+            p = kron_gp.conditional('p', self.Xnew)
+        kron_logp = kron_model.logp({'p': self.pnew})
+        npt.assert_allclose(kron_logp, self.logp, atol=0, rtol=1e-2)
+
+    def testMarginalKronRaises(self):
+        with pm.Model() as kron_model:
+            gp1 = pm.gp.MarginalKron(mean_func=self.mean,
+                                     cov_funcs=self.cov_funcs)
+            gp2 = pm.gp.MarginalKron(mean_func=self.mean,
+                                     cov_funcs=self.cov_funcs)
+        with pytest.raises(TypeError):
+            gp1 + gp2
diff --git a/pymc3/tests/test_math.py b/pymc3/tests/test_math.py
index efcbe17eef..b231b08127 100644
--- a/pymc3/tests/test_math.py
+++ b/pymc3/tests/test_math.py
@@ -5,12 +5,79 @@
 from theano.tests import unittest_tools as utt
 from pymc3.math import (
     LogDet, logdet, probit, invprobit, expand_packed_triangular,
-    log1pexp, log1mexp)
+    log1pexp, log1mexp, kronecker, cartesian, kron_dot, kron_solve_lower)
 from .helpers import SeededTest
 import pytest
 from pymc3.theanof import floatX
 
 
+def test_kronecker():
+    np.random.seed(1)
+    # Create random matrices
+    [a, b, c] = [np.random.rand(3, 3+i) for i in range(3)]
+
+    custom = kronecker(a, b, c)       # Custom version
+    nested = tt.slinalg.kron(a, tt.slinalg.kron(b, c))
+    np.testing.assert_array_almost_equal(
+        custom.eval(),
+        nested.eval()   # Standard nested version
+        )
+
+
+def test_cartesian():
+    np.random.seed(1)
+    a = [1, 2, 3]
+    b = [0, 2]
+    c = [5, 6]
+    manual_cartesian = np.array(
+        [[1, 0, 5],
+         [1, 0, 6],
+         [1, 2, 5],
+         [1, 2, 6],
+         [2, 0, 5],
+         [2, 0, 6],
+         [2, 2, 5],
+         [2, 2, 6],
+         [3, 0, 5],
+         [3, 0, 6],
+         [3, 2, 5],
+         [3, 2, 6],
+         ]
+        )
+    auto_cart = cartesian(a, b, c)
+    np.testing.assert_array_almost_equal(manual_cartesian, auto_cart)
+
+
+def test_kron_dot():
+    np.random.seed(1)
+    # Create random matrices
+    Ks = [np.random.rand(3, 3) for i in range(3)]
+    # Create random vector with correct shape
+    tot_size = np.prod([k.shape[1] for k in Ks])
+    x = np.random.rand(tot_size).reshape((tot_size, 1))
+    # Construct entire kronecker product then multiply
+    big = kronecker(*Ks)
+    slow_ans = tt.dot(big, x)
+    # Use tricks to avoid construction of entire kronecker product
+    fast_ans = kron_dot(Ks, x)
+    np.testing.assert_array_almost_equal(slow_ans.eval(), fast_ans.eval())
+
+
+def test_kron_solve_lower():
+    np.random.seed(1)
+    # Create random matrices
+    Ls = [np.tril(np.random.rand(3, 3)) for i in range(3)]
+    # Create random vector with correct shape
+    tot_size = np.prod([L.shape[1] for L in Ls])
+    x = np.random.rand(tot_size).reshape((tot_size, 1))
+    # Construct entire kronecker product then solve
+    big = kronecker(*Ls)
+    slow_ans = tt.slinalg.solve_lower_triangular(big, x)
+    # Use tricks to avoid construction of entire kronecker product
+    fast_ans = kron_solve_lower(Ls, x)
+    np.testing.assert_array_almost_equal(slow_ans.eval(), fast_ans.eval())
+
+
 def test_probit():
     p = np.array([0.01, 0.25, 0.5, 0.75, 0.99])
     np.testing.assert_allclose(invprobit(probit(p)).eval(), p, atol=1e-5)