-
-
Notifications
You must be signed in to change notification settings - Fork 2.1k
SparseLatent GP #2951
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
SparseLatent GP #2951
Changes from 7 commits
e13c4cd
0d34aed
1fa1ea7
768fa70
44cfbb9
c3cdd2c
ab6e01d
2389392
e63278e
0f40ea7
974d84b
522cb15
812d421
e9d3241
9959049
26828b1
f05bac7
cf3c0fa
e9a28b5
fbdbdf8
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,4 +1,4 @@ | ||
from . import cov | ||
from . import mean | ||
from . import util | ||
from .gp import Latent, Marginal, MarginalSparse, TP, MarginalKron | ||
from .gp import Latent, LatentSparse, Marginal, MarginalSparse, TP, MarginalKron |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -7,7 +7,8 @@ | |
from pymc3.gp.cov import Covariance, Constant | ||
from pymc3.gp.mean import Zero | ||
from pymc3.gp.util import (conditioned_vars, | ||
infer_shape, stabilize, cholesky, solve_lower, solve_upper) | ||
infer_shape, stabilize, cholesky, solve_lower, solve_upper, | ||
invert_dot, project_inverse) | ||
from pymc3.distributions import draw_values | ||
from pymc3.distributions.dist_math import eigh | ||
from ..math import cartesian, kron_dot, kron_diag | ||
|
@@ -207,6 +208,223 @@ def conditional(self, name, Xnew, given=None, **kwargs): | |
return pm.MvNormal(name, mu=mu, chol=chol, shape=shape, **kwargs) | ||
|
||
|
||
@conditioned_vars(["X", "Xu", "f"]) | ||
class LatentSparse(Latent): | ||
R""" | ||
Approximate latent Gaussian process (GP). | ||
|
||
The `gp.LatentSparse` class is a direct implementation of a GP. No addiive | ||
noise is assumed. It is called "Latent" because the underlying function | ||
values are treated as latent variables. It has a `prior` method and a | ||
`conditional` method. Given a mean and covariance function the | ||
function :math:`f(X)` is modeled as, | ||
|
||
.. math:: | ||
|
||
f \sim \int p(f \mid u) p(u) \mathrm{d}u | ||
|
||
where `u` is the GP function prior on a set of inducing points `Xu` | ||
|
||
.. math:: | ||
|
||
u \mid X_u = \sim \text{MvNormal}\left( \mu(X_u), K(X_u, X_u) \right) | ||
|
||
The DTC and FITC approximations use a simplified `p(f | u) ~ q(f | u)` | ||
and the resulting `f` is a GP prior only | ||
in the case of the FITC approximation. | ||
|
||
Use the `prior` and `conditional` methods to actually construct random | ||
variables representing the unknown, or latent, function whose | ||
distribution is the GP prior or GP conditional. This GP implementation | ||
can be used to implement regression on data that is not normally | ||
distributed. For more information on the `prior` and `conditional` methods, | ||
see their docstrings. | ||
|
||
Parameters | ||
---------- | ||
cov_func : None, 2D array, or instance of Covariance | ||
The covariance function. Defaults to zero. | ||
Must implement the `diag` method for the FITC approximation | ||
mean_func : None, instance of Mean | ||
The mean function. Defaults to zero. | ||
approx : str | ||
Approximation to use. One of ['DTC', 'FITC'] | ||
|
||
Examples | ||
-------- | ||
.. code:: python | ||
|
||
# A one dimensional column vector of inputs. | ||
X = np.linspace(0, 1, 10)[:, None] | ||
|
||
# A smaller set of inducing inputs | ||
Xu = np.linspace(0, 1, 5)[:, None] | ||
|
||
with pm.Model() as model: | ||
# Specify the covariance function. | ||
cov_func = pm.gp.cov.ExpQuad(1, ls=0.1) | ||
|
||
# Specify the GP. The default mean function is `Zero`. | ||
gp = pm.gp.LatentSparse(cov_func=cov_func) | ||
|
||
# Place a GP prior over the function f.at points X and inducing points Xu | ||
f = gp.prior("f", X=X, Xu=Xu) | ||
|
||
... | ||
|
||
# After fitting or sampling, specify the distribution | ||
# at new points with .conditional | ||
Xnew = np.linspace(-1, 2, 50)[:, None] | ||
|
||
with model: | ||
fcond = gp.conditional("fcond", Xnew=Xnew) | ||
""" | ||
|
||
_available_approx = ("DTC", "FITC") | ||
|
||
def __init__(self, mean_func=Zero(), cov_func=Constant(0.0), approx="FITC"): | ||
if approx not in self._available_approx: | ||
raise NotImplementedError(approx) | ||
self.approx = approx | ||
super(Latent, self).__init__(mean_func, cov_func) | ||
|
||
def _build_prior(self, name, X, Xu, **kwargs): | ||
mu = self.mean_func(X) # (n,) | ||
Luu = cholesky(stabilize(self.cov_func(Xu))) # (m, m) \sqrt{K_u} | ||
shape = infer_shape(Xu, kwargs.pop("shape", None)) | ||
v = pm.Normal(name + "_u_rotated_", mu=0.0, sd=1.0, shape=shape, **kwargs) | ||
u_ = self.mean_func(Xu) + tt.dot(Luu, v) # mean + chol method of MvGaussian | ||
u = pm.Deterministic(name+'_u', u_) # (m,) prior at inducing points | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. just a style thing, There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. small thing, would prefer:
I think this is common when writing pymc3 programs, e.g |
||
Kuuiu = invert_dot(Luu, u) # (m,) K_{uu}^{-1} u | ||
Kfu = self.cov_func(X, Xu) # (n, m) | ||
f_ = mu + tt.dot(Kfu, Kuuiu) # (n, m) @ (m,) = (n,) | ||
if self.approx == 'DTC': | ||
f = pm.Deterministic(name, f_) | ||
elif self.approx == 'FITC': | ||
Qff_diag = project_inverse(Kfu, Luu, diag=True) | ||
Kff_diag = self.cov_func.diag(X) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
|
||
# MvNormal with diagonal cov is Normal with sd=cov**0.5 | ||
var = tt.clip(Kff_diag - Qff_diag, 0, np.inf) | ||
f = pm.Normal(name, mu=f_, tau=tt.inv(var), shape=shape) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Why use There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. also, There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
and in the docstring mention that if There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Not sure if it's more expensive computationally. Should profile it.
Good call
I'm thinking no... any reason you can think of at all for someone to pass kwargs there? |
||
return f | ||
|
||
def prior(self, name, X, Xu, **kwargs): | ||
R""" | ||
Returns the GP prior distribution evaluated over the input | ||
locations `X` with inducing locations `Xu`. | ||
|
||
This is the prior probability over the space | ||
of functions described by its mean and covariance function. | ||
|
||
The DTC and FITC approximations use a simplified form of the true conditional `p(f | u)` | ||
|
||
.. math:: | ||
f \mid X, X_u \sim \text{MvNormal}\left( K(X, X_u) K(X_u, X_u)^{-1} u, K(X, X) - Q(X, X) \right) | ||
|
||
where | ||
|
||
.. math:: | ||
u \mid X_u \sim \text{MvNormal}\left( \mu(X_u), K(X_u, X_u) \right) | ||
|
||
and | ||
|
||
.. math:: | ||
|
||
Q(X, X') = K(X, X_u) K(X_u, X_u)^{-1} K(X_u, X') | ||
|
||
The DTC approximation uses (resulting in a non-GP prior) | ||
|
||
.. math:: | ||
K(X, X) - Q(X, X) \approx 0 | ||
|
||
The FITC approximation uses (resulting in a GP prior) | ||
|
||
.. math:: | ||
|
||
K(X, X) - Q(X, X) \approx \mathrm{diag}(K(X, X) - Q(X, X)) | ||
|
||
Parameters | ||
---------- | ||
name : string | ||
Name of the random variable | ||
X : array-like | ||
Function input values. If one-dimensional, must be a column | ||
vector with shape `(n, 1)`. | ||
Xu: array-like | ||
The inducing points. Must have the same number of columns as `X`. | ||
**kwargs | ||
Extra keyword arguments that are passed to distribution constructor. | ||
""" | ||
|
||
f = self._build_prior(name, X, Xu, **kwargs) | ||
self.X = X | ||
self.Xu = Xu | ||
self.f = f | ||
return f | ||
|
||
def _get_given_vals(self, given): | ||
if given is None: | ||
given = {} | ||
if 'gp' in given: | ||
cov_total = given['gp'].cov_func | ||
mean_total = given['gp'].mean_func | ||
else: | ||
cov_total = self.cov_func | ||
mean_total = self.mean_func | ||
if all(val in given for val in ['X', 'f', 'Xu']): | ||
X, Xu, f = given['X'], given['Xu'], given['f'] | ||
else: | ||
X, Xu, f = self.X, self.Xu, self.f | ||
return X, Xu, f, cov_total, mean_total | ||
|
||
def _build_conditional(self, Xnew, X, Xu, f, cov_total, mean_total): | ||
Kuu = cov_total(Xu) | ||
Luu = cholesky(stabilize(Kuu)) | ||
|
||
Kuf = cov_total(Xu, X) | ||
Kuffu = tt.dot(Kuf, Kuf.T) | ||
Luffu = cholesky(stabilize(Kuffu)) | ||
Ksu = self.cov_func(Xnew, Xu) | ||
r = f - mean_total(X) # the equations are derived for 0-mean f | ||
Kuuiu = invert_dot(Luffu, tt.dot(Kuf, r)) | ||
mus = self.mean_func(Xnew) + tt.dot(Ksu, Kuuiu) | ||
if self.approx == 'FITC': | ||
Lambda = self.cov_func.diag(X) - project_inverse(Kuf.T, Luu, diag=True) | ||
Qsf = project_inverse(Ksu, Luu, P_T=Kuf) | ||
mus += tt.dot(Qsf, f / Lambda) | ||
Qss = project_inverse(Ksu, Luu) | ||
Kss = self.cov_func(Xnew) | ||
cov = tt.clip(Kss - Qss, 0, np.inf) | ||
if self.approx == 'FITC': | ||
cov -= tt.dot(Qsf, tt.transpose(Qsf / Lambda)) | ||
return mus, cov | ||
|
||
def conditional(self, name, Xnew, given=None, **kwargs): | ||
R""" | ||
Returns the approximate conditional distribution evaluated | ||
over new input locations `Xnew`. | ||
|
||
Parameters | ||
---------- | ||
name : string | ||
Name of the random variable | ||
Xnew : array-like | ||
Function input values. | ||
given : dict | ||
Can optionally take as key value pairs: `X`, `y`, `Xu`, | ||
and `gp`. See the section in the documentation on additive GP | ||
models in PyMC3 for more information. | ||
**kwargs | ||
Extra keyword arguments that are passed to `MvNormal` distribution | ||
constructor. | ||
""" | ||
givens = self._get_given_vals(given) | ||
mu, cov = self._build_conditional(Xnew, *givens) | ||
chol = cholesky(stabilize(cov)) | ||
shape = infer_shape(Xnew, kwargs.pop("shape", None)) | ||
return pm.MvNormal(name, mu=mu, chol=chol, shape=shape, **kwargs) | ||
|
||
|
||
@conditioned_vars(["X", "f", "nu"]) | ||
class TP(Latent): | ||
""" | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -9,6 +9,24 @@ | |
solve_upper = tt.slinalg.Solve(A_structure='upper_triangular') | ||
solve = tt.slinalg.Solve(A_structure='general') | ||
|
||
def invert_dot(L, X): | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. These are great! For consistency though, it would be good to use these functions throughout the gp source code (I know, a pain) or just write the theano ops as before. |
||
"""Wrapper for common pattern K^{-1} @ X where K = L @ L^T""" | ||
return solve_upper(L.T, solve_lower(L, X)) | ||
|
||
def project_inverse(P, L, diag=True, P_T=None): | ||
"""Wrapper for common pattern P @ K^{-1} @ P^T where K = L @ L^T""" | ||
symmetric = P_T is None | ||
if symmetric: | ||
P_T = P.T | ||
A = solve_lower(L, P_T) | ||
if diag: | ||
return tt.sum(A * A, axis=0) # the diagonal of A.T @ A | ||
else: | ||
if symmetric: | ||
return tt.dot(A.T, A) | ||
else: | ||
return tt.dot(P, invert_dot(L, P_T)) | ||
|
||
|
||
def infer_shape(X, n_points=None): | ||
if n_points is None: | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -683,6 +683,72 @@ def testLatent2(self): | |
npt.assert_allclose(latent_logp, self.logp, atol=5) | ||
|
||
|
||
class TestLatentVsLatentSparse(object): | ||
R""" | ||
Compare logp of models Latent and LatentSparse. | ||
Should be nearly equal when inducing points are same as inputs. | ||
""" | ||
def setup_method(self): | ||
X = np.random.randn(50,3) | ||
y = np.random.randn(50)*0.01 | ||
Xnew = np.random.randn(60, 3) | ||
pnew = np.random.randn(60)*0.01 | ||
with pm.Model() as model: | ||
cov_func = pm.gp.cov.ExpQuad(3, [0.1, 0.2, 0.3]) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. To make this test stable, could use a covariance function that has a white noise term. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Should these be subclassing There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. oh yes thats true |
||
mean_func = pm.gp.mean.Constant(0.5) | ||
gp = pm.gp.Latent(mean_func, cov_func) | ||
f = gp.prior("f", X) | ||
chol = np.linalg.cholesky(cov_func(X).eval()) | ||
y_rotated = np.linalg.solve(chol, y - mean_func(X).eval()) | ||
logp_params = { 'f_rotated_': y_rotated } | ||
self.logp_prior = model.logp(logp_params) | ||
with model: | ||
p = gp.conditional("p", Xnew) | ||
logp_params['p'] = pnew | ||
self.logp_coditional = model.logp(logp_params) | ||
self.X = X | ||
self.Xnew = Xnew | ||
self.y = y | ||
self.pnew = pnew | ||
self.gp = gp | ||
|
||
@pytest.mark.parametrize('approx', ['FITC', 'DTC']) | ||
def testPriorApproximations(self, approx): | ||
with pm.Model() as model: | ||
cov_func = pm.gp.cov.ExpQuad(3, [0.1, 0.2, 0.3]) | ||
mean_func = pm.gp.mean.Constant(0.5) | ||
gp = pm.gp.LatentSparse(mean_func, cov_func, approx=approx) | ||
f = gp.prior("f", self.X, self.X) | ||
chol = np.linalg.cholesky(cov_func(self.X).eval()) | ||
y_rotated = np.linalg.solve(chol, self.y - mean_func(self.X).eval()) | ||
model_params = { | ||
"f_u_rotated_": y_rotated, | ||
} | ||
if approx == 'FITC': | ||
model_params['f'] = self.y # need to specify as well since f ~ Normal(f_, diag(Kff-Qff)) | ||
approx_logp = model.logp(model_params) | ||
npt.assert_allclose(approx_logp, self.logp_prior, atol=0, rtol=1e-2) | ||
|
||
@pytest.mark.parametrize('approx', ['FITC', 'DTC']) | ||
def testConditionalApproximations(self, approx): | ||
with pm.Model() as model: | ||
cov_func = pm.gp.cov.ExpQuad(3, [0.1, 0.2, 0.3]) | ||
mean_func = pm.gp.mean.Constant(0.5) | ||
gp = pm.gp.LatentSparse(mean_func, cov_func, approx=approx) | ||
f = gp.prior("f", self.X, self.X) | ||
p = gp.conditional("p", self.Xnew) | ||
chol = np.linalg.cholesky(cov_func(self.X).eval()) | ||
y_rotated = np.linalg.solve(chol, self.y - mean_func(self.X).eval()) | ||
model_params = { | ||
"f_u_rotated_": y_rotated, | ||
"p": self.pnew | ||
} | ||
if approx == 'FITC': | ||
model_params['f'] = self.y # need to specify as well since f ~ Normal(f_, diag(Kff-Qff)) | ||
approx_logp = model.logp(model_params) | ||
npt.assert_allclose(approx_logp, self.logp_coditional, atol=0, rtol=5e-2) | ||
|
||
|
||
class TestMarginalVsMarginalSparse(object): | ||
R""" | ||
Compare logp of models Marginal and MarginalSparse. | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
all of them must implement the
diag
method. I think there's no need to specify this.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Perhaps I should reformulate this that the
cov_func
has to implement a computationally efficientdiag
method (i.e. not usett.diagonal(full(X, X))
for the approximation to be sparse?