Skip to content

Kronecker GP #2731

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 15 commits into from
Feb 15, 2018
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 7 additions & 1 deletion RELEASE-NOTES.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,20 +5,26 @@
### New features

- Add `logit_p` keyword to `pm.Bernoulli`, so that users can specify the logit of the success probability. This is faster and more stable than using `p=tt.nnet.sigmoid(logit_p)`.
- Add `random` keyword to `pm.DensityDist` thus enabling users to pass custom random method which in turn makes sampling from a `DensityDist` possible.
- Add `random` keyword to `pm.DensityDist` thus enabling users to pass custom random method which in turn makes sampling from a `DensityDist` possible.
- Effective sample size computation is updated. The estimation uses Geyer's initial positive sequence, which no longer truncates the autocorrelation series inaccurately. `pm.diagnostics.effective_n` now can reports N_eff>N.
- Added `KroneckerNormal` distribution and a corresponding `MarginalKron`
Gaussian Process implementation for efficient inference, along with
lower-level functions such as `cartesian` and `kronecker` products.
- Added `Coregion` covariance function.


### Fixes

- `VonMises` does not overflow for large values of kappa. i0 and i1 have been removed and we now use log_i0 to compute the logp.


### Deprecations

- DIC and BPIC calculations have been removed
- `njobs` and `nchains` kwarg are deprecated in favor of `cores` and `chains` for `sample`
- `lag` kwarg in `pm.stats.autocorr` and `pm.stats.autocov` is deprecated.


## PyMC 3.3 (January 9, 2018)

### New features
Expand Down
2 changes: 2 additions & 0 deletions pymc3/distributions/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,7 @@

from .multivariate import MvNormal
from .multivariate import MatrixNormal
from .multivariate import KroneckerNormal
from .multivariate import MvStudentT
from .multivariate import Dirichlet
from .multivariate import Multinomial
Expand Down Expand Up @@ -123,6 +124,7 @@
'TensorType',
'MvNormal',
'MatrixNormal',
'KroneckerNormal',
'MvStudentT',
'Dirichlet',
'Multinomial',
Expand Down
135 changes: 135 additions & 0 deletions pymc3/distributions/dist_math.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,9 @@
from .special import gammaln
from pymc3.theanof import floatX

from six.moves import xrange
from functools import partial

f = floatX
c = - .5 * np.log(2. * np.pi)

Expand Down Expand Up @@ -326,3 +329,135 @@ def grad(self, inputs, grads):
x_grad, = grads

return [x_grad * self.grad_op(x)]


# Custom Eigh, EighGrad, and eigh are required until
# https://github.com/Theano/Theano/pull/6557 is handled, since lambda's
# cannot be used with pickling.
class Eigh(tt.nlinalg.Eig):
"""
Return the eigenvalues and eigenvectors of a Hermitian or symmetric matrix.

This is a copy of Eigh from theano that calls an EighGrad which uses
partial instead of lambda. Once this has been merged with theano this
should be removed.
"""

_numop = staticmethod(np.linalg.eigh)
__props__ = ('UPLO',)

def __init__(self, UPLO='L'):
assert UPLO in ['L', 'U']
self.UPLO = UPLO

def make_node(self, x):
x = tt.as_tensor_variable(x)
assert x.ndim == 2
# Numpy's linalg.eigh may return either double or single
# presision eigenvalues depending on installed version of
# LAPACK. Rather than trying to reproduce the (rather
# involved) logic, we just probe linalg.eigh with a trivial
# input.
w_dtype = self._numop([[np.dtype(x.dtype).type()]])[0].dtype.name
w = theano.tensor.vector(dtype=w_dtype)
v = theano.tensor.matrix(dtype=x.dtype)
return theano.gof.Apply(self, [x], [w, v])

def perform(self, node, inputs, outputs):
(x,) = inputs
(w, v) = outputs
w[0], v[0] = self._numop(x, self.UPLO)

def grad(self, inputs, g_outputs):
r"""The gradient function should return
.. math:: \sum_n\left(W_n\frac{\partial\,w_n}
{\partial a_{ij}} +
\sum_k V_{nk}\frac{\partial\,v_{nk}}
{\partial a_{ij}}\right),
where [:math:`W`, :math:`V`] corresponds to ``g_outputs``,
:math:`a` to ``inputs``, and :math:`(w, v)=\mbox{eig}(a)`.
Analytic formulae for eigensystem gradients are well-known in
perturbation theory:
.. math:: \frac{\partial\,w_n}
{\partial a_{ij}} = v_{in}\,v_{jn}
.. math:: \frac{\partial\,v_{kn}}
{\partial a_{ij}} =
\sum_{m\ne n}\frac{v_{km}v_{jn}}{w_n-w_m}
"""
x, = inputs
w, v = self(x)
# Replace gradients wrt disconnected variables with
# zeros. This is a work-around for issue #1063.
gw, gv = tt.nlinalg._zero_disconnected([w, v], g_outputs)
return [EighGrad(self.UPLO)(x, w, v, gw, gv)]


class EighGrad(theano.Op):
"""
Gradient of an eigensystem of a Hermitian matrix.

This is a copy of EighGrad from theano that uses partial instead of lambda.
Once this has been merged with theano this should be removed.
"""

__props__ = ('UPLO',)

def __init__(self, UPLO='L'):
assert UPLO in ['L', 'U']
self.UPLO = UPLO
if UPLO == 'L':
self.tri0 = np.tril
self.tri1 = partial(np.triu, k=1)
else:
self.tri0 = np.triu
self.tri1 = partial(np.tril, k=-1)

def make_node(self, x, w, v, gw, gv):
x, w, v, gw, gv = map(tt.as_tensor_variable, (x, w, v, gw, gv))
assert x.ndim == 2
assert w.ndim == 1
assert v.ndim == 2
assert gw.ndim == 1
assert gv.ndim == 2
out_dtype = theano.scalar.upcast(x.dtype, w.dtype, v.dtype,
gw.dtype, gv.dtype)
out = theano.tensor.matrix(dtype=out_dtype)
return theano.gof.Apply(self, [x, w, v, gw, gv], [out])

def perform(self, node, inputs, outputs):
"""
Implements the "reverse-mode" gradient for the eigensystem of
a square matrix.
"""
x, w, v, W, V = inputs
N = x.shape[0]
outer = np.outer

def G(n):
return sum(v[:, m] * V.T[n].dot(v[:, m]) / (w[n] - w[m])
for m in xrange(N) if m != n)

g = sum(outer(v[:, n], v[:, n] * W[n] + G(n))
for n in xrange(N))

# Numpy's eigh(a, 'L') (eigh(a, 'U')) is a function of tril(a)
# (triu(a)) only. This means that partial derivative of
# eigh(a, 'L') (eigh(a, 'U')) with respect to a[i,j] is zero
# for i < j (i > j). At the same time, non-zero components of
# the gradient must account for the fact that variation of the
# opposite triangle contributes to variation of two elements
# of Hermitian (symmetric) matrix. The following line
# implements the necessary logic.
out = self.tri0(g) + self.tri1(g).T

# Make sure we return the right dtype even if NumPy performed
# upcasting in self.tri0.
outputs[0][0] = np.asarray(out, dtype=node.outputs[0].dtype)

def infer_shape(self, node, shapes):
return [shapes[0]]


def eigh(a, UPLO='L'):
"""A copy, remove with Eigh and EighGrad when possible"""
return Eigh(UPLO)(a)
Loading