-
-
Notifications
You must be signed in to change notification settings - Fork 2.1k
Efficient sampling from mixture models #547
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 11 commits
872370d
9bbd3d6
9db4738
d74ef2f
7bf3dca
b87c4cc
7189a21
aef7a29
17d0b31
bad7b04
89124d4
201d5e8
a29c890
b44d10a
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -13,31 +13,68 @@ | |
zeros_like, ones, ones_like, | ||
concatenate, constant, argmax) | ||
|
||
import theano | ||
|
||
from numpy import pi, inf, nan | ||
from .special import gammaln, multigammaln | ||
|
||
from numpy import pi, inf, nan, float64, array | ||
from .special import gammaln, multigammaln | ||
from theano.ifelse import ifelse | ||
from theano.printing import Print | ||
from .distribution import * | ||
|
||
def guess_is_scalar(c): | ||
bcast = None | ||
if (isinstance(c, t.TensorType)): | ||
return c.broadcastable == () | ||
|
||
if (isinstance(c, t.Variable) or isinstance(c, t.Constant)): | ||
return c.type.broadcastable == () | ||
|
||
tt = t.as_tensor_variable(c) | ||
return tt.type.broadcastable == () | ||
|
||
def bound(logp, *conditions): | ||
impossible = constant(-inf, dtype=float64) # We re-use that constant | ||
|
||
def bound_scalar(logp, *conditions): | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Could we remove There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I tried to write a generic version of bound that can deal with all the kinds of different inputs that bound is actually fed with. Notable variants of arguments to bound included: logp: Can be theano scalar or theano vector (in practice, not by contract) In the case that logp is a vector, I tried to write a general version which used scan and theano.ifelse to handle every case correctly. It's hard to exactly mimic theano.switch behaviour using scan and ifelse, though, So in order not to introduce any new bugs, I fell back to the old solution using switch, except for the case where I know logp to be a scalar. Which is the only specific case where I knew using switch introduced a bug. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. theano.switch evaluates both alternatives it swiches between, regardless of the conditions. Unlike ifelse, which evaluates only the expression which gets returned by it. Consequently, the case that bound using switch doesn't handle correctly is if I feed it a logp which leads to an exception if one of the conditions is invalid. This can happen in Categorical.logp, since the logp is created by looking up an index in a weights vector. If the index is out of bounds, an exception gets raised. This should be prevented by the conditions to bound (which check if the index is out of bound), but since switch evaluates the (invalid) logp anyway, an exception gets thrown. Apart from that, since switch evaluates dead expression branches, it's inefficient as hell. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I'm fine with this, since most users will be using |
||
""" | ||
Bounds a log probability density with several conditions | ||
|
||
Parameters | ||
---------- | ||
logp : float | ||
*conditionss : booleans | ||
*conditions : booleans | ||
|
||
Returns | ||
------- | ||
logp if all conditions are true | ||
-inf if some are false | ||
""" | ||
cond = alltrue(conditions) | ||
return ifelse(cond, t.cast(logp, dtype='float64'), impossible) | ||
|
||
|
||
def bound(logp, *conditions): | ||
""" | ||
Bounds a log probability density with several conditions | ||
|
||
return switch(alltrue(conditions), logp, -inf) | ||
Parameters | ||
---------- | ||
logp : float | ||
*conditionss : booleans | ||
|
||
Returns | ||
------- | ||
logp if all conditions are true | ||
-inf if some are false | ||
""" | ||
if (guess_is_scalar(logp)): | ||
for c in conditions: | ||
if (type(c)==bool): | ||
continue | ||
if (not guess_is_scalar(c)): | ||
break | ||
return bound_scalar(logp, *conditions) | ||
return switch(alltrue(conditions), logp, impossible) | ||
|
||
def alltrue(vals): | ||
ret = 1 | ||
|
@@ -52,6 +89,11 @@ def logpow(x, m): | |
""" | ||
return switch(eq(x, 0) & eq(m, 0), 0, m * log(x)) | ||
|
||
def logpow_relaxed(x, m): | ||
""" | ||
Calculates log(x**m), unless m==0 or x==0 in which case it will return 0 | ||
""" | ||
return switch(eq(x, 0) | eq(m, 0), 0, m * log(x)) | ||
|
||
def factln(n): | ||
return gammaln(n + 1) | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,117 @@ | ||
|
||
|
||
from pymc import * | ||
|
||
from numpy.random import normal | ||
import numpy as np | ||
import pylab as pl | ||
from itertools import product | ||
import theano | ||
import theano.tensor as T | ||
from theano.ifelse import ifelse | ||
from numpy import pi | ||
from numpy.random import uniform, multivariate_normal | ||
from pymc.model import FreeRV, ObservedRV | ||
import numpy as np | ||
import numpy.linalg as np_linalg | ||
from pymc.step_methods.metropolis_hastings import GenericProposal | ||
|
||
""" | ||
Multivariate Mixture distributions. | ||
|
||
These distributions have a lot of potential uses. For one thing: They can be used to approximate every other density function, | ||
and there are a lot of algorithms for efficiently creating density estimates (i.e. Kernel Density Estimates (KDE), | ||
Gaussian Mixture Model learning and others. | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Would be good to have PEP8 compliancy and not use line lenghts > 80. |
||
|
||
Once we have learned such a density, we could use it as a prior density. To do that, we need to be able to | ||
efficiently sample from it. Luckily, that's simple and efficient. Unless you use the wrong proposal distribution, of course. | ||
|
||
That's where these classes come in handy. | ||
|
||
@author Kai Londenberg ( [email protected] ) | ||
|
||
""" | ||
class MvGaussianMixture(Continuous): | ||
|
||
def __init__(self, shape, cat_var, mus, taus, model=None, *args, **kwargs): | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I recommend using singular |
||
''' | ||
Creates a continuous mixture distribution which can be efficiently evaluated and sampled from. | ||
|
||
Args: | ||
shape: Shape of the distribution. All kernels need to have this same shape as well. | ||
weights: weight vector (may be a theano shared variable or expression. Must be able to evaluate this in the context of the model). | ||
kernels: list of mixture component distributions. These have to be MixtureKernel instances (for example MvNormalKernel ) of same shape | ||
testval: Test value for logp computations | ||
''' | ||
super(MvGaussianMixture, self).__init__(*args, shape=shape, **kwargs) | ||
assert isinstance(cat_var, FreeRV) | ||
assert isinstance(cat_var.distribution, Categorical) | ||
self.cat_var = cat_var | ||
self.model = modelcontext(model) | ||
weights = cat_var.distribution.p | ||
self.weights = weights | ||
self.mus = mus | ||
self.taus = taus | ||
self.mu_t = T.stacklists(mus) | ||
self.tau_t = T.stacklists(taus) | ||
self.shape = shape | ||
self.testval = np.zeros(self.shape, self.dtype) | ||
self.last_cov_value = {} | ||
self.last_tau_value = {} | ||
self.param_fn = None | ||
|
||
def logp(self, value): | ||
mu = self.mu_t[self.cat_var] | ||
tau = self.tau_t[self.cat_var] | ||
|
||
delta = value - mu | ||
k = tau.shape[0] | ||
|
||
return 1/2. * (-k * log(2*pi) + log(det(tau)) - dot(delta.T, dot(tau, delta))) | ||
|
||
def draw_mvn(self, mu, cov): | ||
|
||
return np.random.multivariate_normal(mean=mu, cov=cov) | ||
|
||
def draw(self, point): | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Here and above, lets use |
||
if (self.param_fn is None): | ||
self.param_fn = model.fastfn([self.cat_var, self.mu_t[self.cat_var], self.tau_t[self.cat_var]]) | ||
|
||
cat, mu, tau = self.param_fn(point) | ||
# Cache cov = inv(tau) for each value of cat | ||
cat = int(cat) | ||
last_tau = self.last_tau_value.get(cat, None) | ||
if (last_tau is not None and np.allclose(tau,last_tau)): | ||
mcov = self.last_cov_value[cat] | ||
else: | ||
mcov = np_linalg.inv(tau) | ||
self.last_cov_value[cat] = mcov | ||
self.last_tau_value[cat] = tau | ||
|
||
return self.draw_mvn(mu, mcov) | ||
|
||
_impossible = float('-inf') | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Maybe call this |
||
class GMMProposal(GenericProposal): | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Do you define GMM anywhere? |
||
|
||
def __init__(self, gmm_var, model=None): | ||
super(GMMProposal, self).__init__([gmm_var]) | ||
assert isinstance(gmm_var.distribution, MvGaussianMixture) | ||
model = modelcontext(model) | ||
self.gmm_var = gmm_var | ||
self.logpfunc = model.fastfn(gmm_var.distribution.logp(gmm_var)) | ||
self._proposal_logp_difference = 0.0 | ||
|
||
|
||
def propose_move(self, from_point, point): | ||
old_logp = self.logpfunc(from_point) | ||
point[self.gmm_var.name] = self.gmm_var.distribution.draw(point) | ||
new_logp = self.logpfunc(point) | ||
if (old_logp!=np.nan and old_logp!=_impossible): | ||
self._proposal_logp_difference = old_logp-new_logp | ||
else: | ||
self._proposal_logp_difference = 0.0 | ||
return point | ||
|
||
def proposal_logp_difference(self): | ||
return self._proposal_logp_difference | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,135 @@ | ||
import numpy as np | ||
from pymc import * | ||
|
||
from pymc.step_methods.metropolis_hastings import * | ||
from pymc.distributions.mixtures import * | ||
from pymc.distributions.special import gammaln | ||
from pymc.distributions.dist_math import * | ||
|
||
import theano | ||
import theano.tensor as T | ||
from theano.ifelse import ifelse as theano_ifelse | ||
import pandas | ||
import matplotlib.pylab as plt | ||
import numpy.linalg | ||
|
||
theano.config.mode = 'FAST_COMPILE' | ||
theano.config.compute_test_value = 'raise' | ||
|
||
''' | ||
Example which demonstrates the use of the MvGaussianMixture as a flexible (i.e. arbitrary) Prior | ||
and the usage of the MetropolisHastings step method with a custom Proposal | ||
''' | ||
|
||
# Some helper methods | ||
def is_pos_def(x): | ||
return np.all(np.linalg.eigvals(x) > 0) | ||
|
||
def tmin(a,b): | ||
return T.switch(T.lt(a,b), a, b) | ||
|
||
def tmax(a,b): | ||
return T.switch(T.gt(a,b), a, b) | ||
|
||
''' We construct a number of valid | ||
parameters for the multivariate normal. | ||
|
||
In order to ensure that the covariance matrices are positive semidefinite, they are risen to the (matrix-) power of two. | ||
Precision matrices are obtained by inverting the covariance matrices. | ||
''' | ||
def create_model(with_observations=False): | ||
|
||
# First, we create some parameters for a Multivariate Normal | ||
mu0 = np.ones((2), dtype=np.float64) | ||
cov0 = np.eye(2, dtype=np.float64) * 0.2 | ||
tau0 = numpy.linalg.inv(cov0) | ||
|
||
mu1 = mu0+5.5 | ||
cov1 = cov0.copy() * 1.4 | ||
cov1[0,1] = 0.8 | ||
cov1[1,0] = 0.8 | ||
cov1 = np.linalg.matrix_power(cov1,2) | ||
tau1 = numpy.linalg.inv(cov1) | ||
|
||
mu2 = mu0-2.5 | ||
cov2 = cov0.copy() * 1.4 | ||
cov2[0,1] = 0.8 | ||
cov2[1,0] = 0.8 | ||
cov2 = np.linalg.matrix_power(cov2,2) | ||
tau2 = numpy.linalg.inv(cov2) | ||
|
||
mu3 = mu0*-0.5 + 2.0*mu2 | ||
cov3 = cov0.copy() * 0.9 | ||
cov3[0,1] = -0.8 | ||
cov3[1,0] = -0.8 | ||
cov3 = np.linalg.matrix_power(cov3,2) | ||
tau3 = numpy.linalg.inv(cov3) | ||
|
||
m2 = np.array([3.0]) | ||
aval = np.array([4, 3, 2., 1.]) | ||
a = constant(aval) | ||
|
||
model = Model() | ||
|
||
with model: | ||
k = 4 | ||
p, p_m1 = model.TransformedVar( | ||
'p', Dirichlet.dist(a, shape=k), | ||
simplextransform) | ||
|
||
c = Categorical('c', p) | ||
gmm = MvGaussianMixture('gmm', shape=mu0.shape, cat_var=c, mus=[mu0,mu1,mu2,mu3], taus=[tau0, tau1,tau2,tau3]) | ||
' Now, we ensure these arbitrary values sum to one. What do we get ? A nice probability which we can, in turn, sample from (or observe)' | ||
fac = T.constant( np.array([10.0], dtype=np.float64)) | ||
gmm_p = Deterministic('gmm_p', (tmax(tmin(gmm[0], fac) / fac, T.constant(np.array([0.0], dtype=np.float64))))) | ||
pbeta = Beta('pbeta', 1,1) | ||
if (with_observations): | ||
result = Bernoulli('result', p=gmm_p[0], observed=np.array([1,0,1], dtype=np.bool)) | ||
else: | ||
result = Bernoulli('result', p=gmm_p[0]) | ||
' Try this with a Metropolis instance, and watch it fail ..' | ||
step = MetropolisHastings(vars=model.vars, proposals=[GMMProposal(gmm)]) | ||
return model, step | ||
|
||
def plot_scatter_matrix(title, tr, fig=None): | ||
if (fig is None): | ||
fig = plt.Figure() | ||
t6 = pandas.Series(tr['c']) | ||
t8 = pandas.Series(tr['gmm'][:,0]) | ||
t9 = pandas.Series(tr['gmm'][:,1]) | ||
t10 = pandas.Series(tr['gmm_p'][:,0]) | ||
t11 = pandas.Series(tr['pbeta']) | ||
df = pandas.DataFrame({'cat' : t6, 'gmm_0' : t8, 'gmm_1' : t9, 'p' : t10, 'pbeta' : t11}) | ||
pandas.scatter_matrix(df) | ||
plt.title(title) | ||
return fig | ||
|
||
def plot_p_prior_vs_posterior(prior_trace, posterior_trace, fig=None): | ||
if (fig is None): | ||
fig = plt.Figure() | ||
pr = pandas.Series(prior_trace['gmm_p'][:,0]) | ||
po = pandas.Series(posterior_trace['gmm_p'][:,0]) | ||
plt.hist(pr, bins=40, range=(-0.1, 1.1), color='b', alpha=0.5, label='Prior') | ||
plt.hist(po, bins=40, range=(-0.1, 1.1), color='g', alpha=0.5, label='Posterior') | ||
plt.legend() | ||
plt.show() | ||
|
||
def run(n=10000): | ||
m1, step1 = create_model(False) | ||
m2, step2 = create_model(True) | ||
with m1: | ||
trace1 = sample(n, step1) | ||
with m2: | ||
trace2 = sample(n, step2) | ||
fig1 = plot_scatter_matrix('Prior Scatter Matrix', trace1) | ||
plt.show() | ||
fig2 = plot_scatter_matrix('Posterior Scatter Matrix', trace2) | ||
plt.show() | ||
plot_p_prior_vs_posterior(trace1, trace2) | ||
plt.show() | ||
|
||
if __name__ == '__main__': | ||
run() | ||
print "Done" | ||
|
||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Do you need the parts above? won't just
t.as_tensor_variable(c).type.broadcastable == ()
do everything you need?There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Hi,
I think tried something like that, and it failed in some example or test
case. Don't remember the details, though.
Kai
2014-06-10 9:08 GMT+02:00 John Salvatier [email protected]: