Skip to content

Commit a3af00d

Browse files
author
Junpeng Lao
authored
Merge pull request #2731 from jordan-melendez/KroneckerGP
Kronecker GP
2 parents 77afa45 + ece6ee8 commit a3af00d

File tree

12 files changed

+1149
-23
lines changed

12 files changed

+1149
-23
lines changed

RELEASE-NOTES.md

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,8 +5,12 @@
55
### New features
66

77
- Add `logit_p` keyword to `pm.Bernoulli`, so that users can specify the logit of the success probability. This is faster and more stable than using `p=tt.nnet.sigmoid(logit_p)`.
8-
- Add `random` keyword to `pm.DensityDist` thus enabling users to pass custom random method which in turn makes sampling from a `DensityDist` possible.
8+
- Add `random` keyword to `pm.DensityDist` thus enabling users to pass custom random method which in turn makes sampling from a `DensityDist` possible.
99
- Effective sample size computation is updated. The estimation uses Geyer's initial positive sequence, which no longer truncates the autocorrelation series inaccurately. `pm.diagnostics.effective_n` now can reports N_eff>N.
10+
- Added `KroneckerNormal` distribution and a corresponding `MarginalKron`
11+
Gaussian Process implementation for efficient inference, along with
12+
lower-level functions such as `cartesian` and `kronecker` products.
13+
- Added `Coregion` covariance function.
1014

1115

1216
### Fixes
@@ -15,12 +19,14 @@
1519
- The bandwidth for KDE plots is computed using a modified version of Scott's rule. The new version uses entropy instead of standard
1620
deviation. This works better for multimodal distributions. Functions using KDE plots has a new argument `bw` controlling the bandwidth.
1721

22+
1823
### Deprecations
1924

2025
- DIC and BPIC calculations have been removed
2126
- `njobs` and `nchains` kwarg are deprecated in favor of `cores` and `chains` for `sample`
2227
- `lag` kwarg in `pm.stats.autocorr` and `pm.stats.autocov` is deprecated.
2328

29+
2430
## PyMC 3.3 (January 9, 2018)
2531

2632
### New features

pymc3/distributions/__init__.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -57,6 +57,7 @@
5757

5858
from .multivariate import MvNormal
5959
from .multivariate import MatrixNormal
60+
from .multivariate import KroneckerNormal
6061
from .multivariate import MvStudentT
6162
from .multivariate import Dirichlet
6263
from .multivariate import Multinomial
@@ -123,6 +124,7 @@
123124
'TensorType',
124125
'MvNormal',
125126
'MatrixNormal',
127+
'KroneckerNormal',
126128
'MvStudentT',
127129
'Dirichlet',
128130
'Multinomial',

pymc3/distributions/dist_math.py

Lines changed: 135 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,9 @@
1313
from .special import gammaln
1414
from pymc3.theanof import floatX
1515

16+
from six.moves import xrange
17+
from functools import partial
18+
1619
f = floatX
1720
c = - .5 * np.log(2. * np.pi)
1821

@@ -326,3 +329,135 @@ def grad(self, inputs, grads):
326329
x_grad, = grads
327330

328331
return [x_grad * self.grad_op(x)]
332+
333+
334+
# Custom Eigh, EighGrad, and eigh are required until
335+
# https://github.com/Theano/Theano/pull/6557 is handled, since lambda's
336+
# cannot be used with pickling.
337+
class Eigh(tt.nlinalg.Eig):
338+
"""
339+
Return the eigenvalues and eigenvectors of a Hermitian or symmetric matrix.
340+
341+
This is a copy of Eigh from theano that calls an EighGrad which uses
342+
partial instead of lambda. Once this has been merged with theano this
343+
should be removed.
344+
"""
345+
346+
_numop = staticmethod(np.linalg.eigh)
347+
__props__ = ('UPLO',)
348+
349+
def __init__(self, UPLO='L'):
350+
assert UPLO in ['L', 'U']
351+
self.UPLO = UPLO
352+
353+
def make_node(self, x):
354+
x = tt.as_tensor_variable(x)
355+
assert x.ndim == 2
356+
# Numpy's linalg.eigh may return either double or single
357+
# presision eigenvalues depending on installed version of
358+
# LAPACK. Rather than trying to reproduce the (rather
359+
# involved) logic, we just probe linalg.eigh with a trivial
360+
# input.
361+
w_dtype = self._numop([[np.dtype(x.dtype).type()]])[0].dtype.name
362+
w = theano.tensor.vector(dtype=w_dtype)
363+
v = theano.tensor.matrix(dtype=x.dtype)
364+
return theano.gof.Apply(self, [x], [w, v])
365+
366+
def perform(self, node, inputs, outputs):
367+
(x,) = inputs
368+
(w, v) = outputs
369+
w[0], v[0] = self._numop(x, self.UPLO)
370+
371+
def grad(self, inputs, g_outputs):
372+
r"""The gradient function should return
373+
.. math:: \sum_n\left(W_n\frac{\partial\,w_n}
374+
{\partial a_{ij}} +
375+
\sum_k V_{nk}\frac{\partial\,v_{nk}}
376+
{\partial a_{ij}}\right),
377+
where [:math:`W`, :math:`V`] corresponds to ``g_outputs``,
378+
:math:`a` to ``inputs``, and :math:`(w, v)=\mbox{eig}(a)`.
379+
Analytic formulae for eigensystem gradients are well-known in
380+
perturbation theory:
381+
.. math:: \frac{\partial\,w_n}
382+
{\partial a_{ij}} = v_{in}\,v_{jn}
383+
.. math:: \frac{\partial\,v_{kn}}
384+
{\partial a_{ij}} =
385+
\sum_{m\ne n}\frac{v_{km}v_{jn}}{w_n-w_m}
386+
"""
387+
x, = inputs
388+
w, v = self(x)
389+
# Replace gradients wrt disconnected variables with
390+
# zeros. This is a work-around for issue #1063.
391+
gw, gv = tt.nlinalg._zero_disconnected([w, v], g_outputs)
392+
return [EighGrad(self.UPLO)(x, w, v, gw, gv)]
393+
394+
395+
class EighGrad(theano.Op):
396+
"""
397+
Gradient of an eigensystem of a Hermitian matrix.
398+
399+
This is a copy of EighGrad from theano that uses partial instead of lambda.
400+
Once this has been merged with theano this should be removed.
401+
"""
402+
403+
__props__ = ('UPLO',)
404+
405+
def __init__(self, UPLO='L'):
406+
assert UPLO in ['L', 'U']
407+
self.UPLO = UPLO
408+
if UPLO == 'L':
409+
self.tri0 = np.tril
410+
self.tri1 = partial(np.triu, k=1)
411+
else:
412+
self.tri0 = np.triu
413+
self.tri1 = partial(np.tril, k=-1)
414+
415+
def make_node(self, x, w, v, gw, gv):
416+
x, w, v, gw, gv = map(tt.as_tensor_variable, (x, w, v, gw, gv))
417+
assert x.ndim == 2
418+
assert w.ndim == 1
419+
assert v.ndim == 2
420+
assert gw.ndim == 1
421+
assert gv.ndim == 2
422+
out_dtype = theano.scalar.upcast(x.dtype, w.dtype, v.dtype,
423+
gw.dtype, gv.dtype)
424+
out = theano.tensor.matrix(dtype=out_dtype)
425+
return theano.gof.Apply(self, [x, w, v, gw, gv], [out])
426+
427+
def perform(self, node, inputs, outputs):
428+
"""
429+
Implements the "reverse-mode" gradient for the eigensystem of
430+
a square matrix.
431+
"""
432+
x, w, v, W, V = inputs
433+
N = x.shape[0]
434+
outer = np.outer
435+
436+
def G(n):
437+
return sum(v[:, m] * V.T[n].dot(v[:, m]) / (w[n] - w[m])
438+
for m in xrange(N) if m != n)
439+
440+
g = sum(outer(v[:, n], v[:, n] * W[n] + G(n))
441+
for n in xrange(N))
442+
443+
# Numpy's eigh(a, 'L') (eigh(a, 'U')) is a function of tril(a)
444+
# (triu(a)) only. This means that partial derivative of
445+
# eigh(a, 'L') (eigh(a, 'U')) with respect to a[i,j] is zero
446+
# for i < j (i > j). At the same time, non-zero components of
447+
# the gradient must account for the fact that variation of the
448+
# opposite triangle contributes to variation of two elements
449+
# of Hermitian (symmetric) matrix. The following line
450+
# implements the necessary logic.
451+
out = self.tri0(g) + self.tri1(g).T
452+
453+
# Make sure we return the right dtype even if NumPy performed
454+
# upcasting in self.tri0.
455+
outputs[0][0] = np.asarray(out, dtype=node.outputs[0].dtype)
456+
457+
def infer_shape(self, node, shapes):
458+
return [shapes[0]]
459+
460+
461+
def eigh(a, UPLO='L'):
462+
"""A copy, remove with Eigh and EighGrad when possible"""
463+
return Eigh(UPLO)(a)

0 commit comments

Comments
 (0)