|
13 | 13 | from .special import gammaln
|
14 | 14 | from pymc3.theanof import floatX
|
15 | 15 |
|
| 16 | +from six.moves import xrange |
| 17 | +from functools import partial |
| 18 | + |
16 | 19 | f = floatX
|
17 | 20 | c = - .5 * np.log(2. * np.pi)
|
18 | 21 |
|
@@ -326,3 +329,135 @@ def grad(self, inputs, grads):
|
326 | 329 | x_grad, = grads
|
327 | 330 |
|
328 | 331 | return [x_grad * self.grad_op(x)]
|
| 332 | + |
| 333 | + |
| 334 | +# Custom Eigh, EighGrad, and eigh are required until |
| 335 | +# https://github.com/Theano/Theano/pull/6557 is handled, since lambda's |
| 336 | +# cannot be used with pickling. |
| 337 | +class Eigh(tt.nlinalg.Eig): |
| 338 | + """ |
| 339 | + Return the eigenvalues and eigenvectors of a Hermitian or symmetric matrix. |
| 340 | +
|
| 341 | + This is a copy of Eigh from theano that calls an EighGrad which uses |
| 342 | + partial instead of lambda. Once this has been merged with theano this |
| 343 | + should be removed. |
| 344 | + """ |
| 345 | + |
| 346 | + _numop = staticmethod(np.linalg.eigh) |
| 347 | + __props__ = ('UPLO',) |
| 348 | + |
| 349 | + def __init__(self, UPLO='L'): |
| 350 | + assert UPLO in ['L', 'U'] |
| 351 | + self.UPLO = UPLO |
| 352 | + |
| 353 | + def make_node(self, x): |
| 354 | + x = tt.as_tensor_variable(x) |
| 355 | + assert x.ndim == 2 |
| 356 | + # Numpy's linalg.eigh may return either double or single |
| 357 | + # presision eigenvalues depending on installed version of |
| 358 | + # LAPACK. Rather than trying to reproduce the (rather |
| 359 | + # involved) logic, we just probe linalg.eigh with a trivial |
| 360 | + # input. |
| 361 | + w_dtype = self._numop([[np.dtype(x.dtype).type()]])[0].dtype.name |
| 362 | + w = theano.tensor.vector(dtype=w_dtype) |
| 363 | + v = theano.tensor.matrix(dtype=x.dtype) |
| 364 | + return theano.gof.Apply(self, [x], [w, v]) |
| 365 | + |
| 366 | + def perform(self, node, inputs, outputs): |
| 367 | + (x,) = inputs |
| 368 | + (w, v) = outputs |
| 369 | + w[0], v[0] = self._numop(x, self.UPLO) |
| 370 | + |
| 371 | + def grad(self, inputs, g_outputs): |
| 372 | + r"""The gradient function should return |
| 373 | + .. math:: \sum_n\left(W_n\frac{\partial\,w_n} |
| 374 | + {\partial a_{ij}} + |
| 375 | + \sum_k V_{nk}\frac{\partial\,v_{nk}} |
| 376 | + {\partial a_{ij}}\right), |
| 377 | + where [:math:`W`, :math:`V`] corresponds to ``g_outputs``, |
| 378 | + :math:`a` to ``inputs``, and :math:`(w, v)=\mbox{eig}(a)`. |
| 379 | + Analytic formulae for eigensystem gradients are well-known in |
| 380 | + perturbation theory: |
| 381 | + .. math:: \frac{\partial\,w_n} |
| 382 | + {\partial a_{ij}} = v_{in}\,v_{jn} |
| 383 | + .. math:: \frac{\partial\,v_{kn}} |
| 384 | + {\partial a_{ij}} = |
| 385 | + \sum_{m\ne n}\frac{v_{km}v_{jn}}{w_n-w_m} |
| 386 | + """ |
| 387 | + x, = inputs |
| 388 | + w, v = self(x) |
| 389 | + # Replace gradients wrt disconnected variables with |
| 390 | + # zeros. This is a work-around for issue #1063. |
| 391 | + gw, gv = tt.nlinalg._zero_disconnected([w, v], g_outputs) |
| 392 | + return [EighGrad(self.UPLO)(x, w, v, gw, gv)] |
| 393 | + |
| 394 | + |
| 395 | +class EighGrad(theano.Op): |
| 396 | + """ |
| 397 | + Gradient of an eigensystem of a Hermitian matrix. |
| 398 | +
|
| 399 | + This is a copy of EighGrad from theano that uses partial instead of lambda. |
| 400 | + Once this has been merged with theano this should be removed. |
| 401 | + """ |
| 402 | + |
| 403 | + __props__ = ('UPLO',) |
| 404 | + |
| 405 | + def __init__(self, UPLO='L'): |
| 406 | + assert UPLO in ['L', 'U'] |
| 407 | + self.UPLO = UPLO |
| 408 | + if UPLO == 'L': |
| 409 | + self.tri0 = np.tril |
| 410 | + self.tri1 = partial(np.triu, k=1) |
| 411 | + else: |
| 412 | + self.tri0 = np.triu |
| 413 | + self.tri1 = partial(np.tril, k=-1) |
| 414 | + |
| 415 | + def make_node(self, x, w, v, gw, gv): |
| 416 | + x, w, v, gw, gv = map(tt.as_tensor_variable, (x, w, v, gw, gv)) |
| 417 | + assert x.ndim == 2 |
| 418 | + assert w.ndim == 1 |
| 419 | + assert v.ndim == 2 |
| 420 | + assert gw.ndim == 1 |
| 421 | + assert gv.ndim == 2 |
| 422 | + out_dtype = theano.scalar.upcast(x.dtype, w.dtype, v.dtype, |
| 423 | + gw.dtype, gv.dtype) |
| 424 | + out = theano.tensor.matrix(dtype=out_dtype) |
| 425 | + return theano.gof.Apply(self, [x, w, v, gw, gv], [out]) |
| 426 | + |
| 427 | + def perform(self, node, inputs, outputs): |
| 428 | + """ |
| 429 | + Implements the "reverse-mode" gradient for the eigensystem of |
| 430 | + a square matrix. |
| 431 | + """ |
| 432 | + x, w, v, W, V = inputs |
| 433 | + N = x.shape[0] |
| 434 | + outer = np.outer |
| 435 | + |
| 436 | + def G(n): |
| 437 | + return sum(v[:, m] * V.T[n].dot(v[:, m]) / (w[n] - w[m]) |
| 438 | + for m in xrange(N) if m != n) |
| 439 | + |
| 440 | + g = sum(outer(v[:, n], v[:, n] * W[n] + G(n)) |
| 441 | + for n in xrange(N)) |
| 442 | + |
| 443 | + # Numpy's eigh(a, 'L') (eigh(a, 'U')) is a function of tril(a) |
| 444 | + # (triu(a)) only. This means that partial derivative of |
| 445 | + # eigh(a, 'L') (eigh(a, 'U')) with respect to a[i,j] is zero |
| 446 | + # for i < j (i > j). At the same time, non-zero components of |
| 447 | + # the gradient must account for the fact that variation of the |
| 448 | + # opposite triangle contributes to variation of two elements |
| 449 | + # of Hermitian (symmetric) matrix. The following line |
| 450 | + # implements the necessary logic. |
| 451 | + out = self.tri0(g) + self.tri1(g).T |
| 452 | + |
| 453 | + # Make sure we return the right dtype even if NumPy performed |
| 454 | + # upcasting in self.tri0. |
| 455 | + outputs[0][0] = np.asarray(out, dtype=node.outputs[0].dtype) |
| 456 | + |
| 457 | + def infer_shape(self, node, shapes): |
| 458 | + return [shapes[0]] |
| 459 | + |
| 460 | + |
| 461 | +def eigh(a, UPLO='L'): |
| 462 | + """A copy, remove with Eigh and EighGrad when possible""" |
| 463 | + return Eigh(UPLO)(a) |
0 commit comments