diff --git a/pymc/distributions/continuous.py b/pymc/distributions/continuous.py index 1c7c4c2537..55b6fec986 100644 --- a/pymc/distributions/continuous.py +++ b/pymc/distributions/continuous.py @@ -8,6 +8,7 @@ from __future__ import division from .dist_math import * +import numpy from numpy.random import uniform as runiform, normal as rnormal __all__ = ['Uniform', 'Flat', 'Normal', 'Beta', 'Exponential', 'Laplace', @@ -54,7 +55,7 @@ def logp(self, value): upper = self.upper return bound( - -log(upper - lower), + -log(upper - lower), # Inserted cast to prevent exception in theano.ifelse.ifelse lower <= value, value <= upper) def random(self, size=None): diff --git a/pymc/distributions/discrete.py b/pymc/distributions/discrete.py index fa78baa11f..bdc517650f 100644 --- a/pymc/distributions/discrete.py +++ b/pymc/distributions/discrete.py @@ -285,7 +285,7 @@ def logp(self, value): p = self.p k = self.k - return bound(log(p[value]), + return bound_scalar(t.sum(log(p[value])), value >= 0, value <= (k - 1), le(abs(sum(p) - 1), 1e-5)) diff --git a/pymc/distributions/dist_math.py b/pymc/distributions/dist_math.py index 08080e5572..f77ab899ed 100644 --- a/pymc/distributions/dist_math.py +++ b/pymc/distributions/dist_math.py @@ -13,31 +13,68 @@ zeros_like, ones, ones_like, concatenate, constant, argmax) +import theano -from numpy import pi, inf, nan -from .special import gammaln, multigammaln +from numpy import pi, inf, nan, float64, array +from .special import gammaln, multigammaln +from theano.ifelse import ifelse from theano.printing import Print from .distribution import * +def guess_is_scalar(c): + bcast = None + if (isinstance(c, t.TensorType)): + return c.broadcastable == () + + if (isinstance(c, t.Variable) or isinstance(c, t.Constant)): + return c.type.broadcastable == () + + tt = t.as_tensor_variable(c) + return tt.type.broadcastable == () -def bound(logp, *conditions): +impossible = constant(-inf, dtype=float64) # We re-use that constant + +def bound_scalar(logp, *conditions): """ Bounds a log probability density with several conditions Parameters ---------- logp : float - *conditionss : booleans + *conditions : booleans Returns ------- logp if all conditions are true -inf if some are false + """ + cond = alltrue(conditions) + return ifelse(cond, t.cast(logp, dtype='float64'), impossible) + + +def bound(logp, *conditions): """ + Bounds a log probability density with several conditions - return switch(alltrue(conditions), logp, -inf) + Parameters + ---------- + logp : float + *conditionss : booleans + Returns + ------- + logp if all conditions are true + -inf if some are false + """ + if (guess_is_scalar(logp)): + for c in conditions: + if (type(c)==bool): + continue + if (not guess_is_scalar(c)): + break + return bound_scalar(logp, *conditions) + return switch(alltrue(conditions), logp, impossible) def alltrue(vals): ret = 1 @@ -52,6 +89,11 @@ def logpow(x, m): """ return switch(eq(x, 0) & eq(m, 0), 0, m * log(x)) +def logpow_relaxed(x, m): + """ + Calculates log(x**m), unless m==0 or x==0 in which case it will return 0 + """ + return switch(eq(x, 0) | eq(m, 0), 0, m * log(x)) def factln(n): return gammaln(n + 1) diff --git a/pymc/distributions/mixtures.py b/pymc/distributions/mixtures.py new file mode 100644 index 0000000000..a8303607b0 --- /dev/null +++ b/pymc/distributions/mixtures.py @@ -0,0 +1,117 @@ + + +from pymc import * + +from numpy.random import normal +import numpy as np +import pylab as pl +from itertools import product +import theano +import theano.tensor as T +from theano.ifelse import ifelse +from numpy import pi +from numpy.random import uniform, multivariate_normal +from pymc.model import FreeRV, ObservedRV +import numpy as np +import numpy.linalg as np_linalg +from pymc.step_methods.metropolis_hastings import GenericProposal + +""" +Multivariate Mixture distributions. + +These distributions have a lot of potential uses. For one thing: They can be used to approximate every other density function, +and there are a lot of algorithms for efficiently creating density estimates (i.e. Kernel Density Estimates (KDE), +Gaussian Mixture Model learning and others. + +Once we have learned such a density, we could use it as a prior density. To do that, we need to be able to +efficiently sample from it. Luckily, that's simple and efficient. Unless you use the wrong proposal distribution, of course. + +That's where these classes come in handy. + +@author Kai Londenberg ( Kai.Londenberg@gmail.com ) + +""" +class MvGaussianMixture(Continuous): + + def __init__(self, shape, cat_var, mus, taus, model=None, *args, **kwargs): + ''' + Creates a continuous mixture distribution which can be efficiently evaluated and sampled from. + + Args: + shape: Shape of the distribution. All kernels need to have this same shape as well. + weights: weight vector (may be a theano shared variable or expression. Must be able to evaluate this in the context of the model). + kernels: list of mixture component distributions. These have to be MixtureKernel instances (for example MvNormalKernel ) of same shape + testval: Test value for logp computations + ''' + super(MvGaussianMixture, self).__init__(*args, shape=shape, **kwargs) + assert isinstance(cat_var, FreeRV) + assert isinstance(cat_var.distribution, Categorical) + self.cat_var = cat_var + self.model = modelcontext(model) + weights = cat_var.distribution.p + self.weights = weights + self.mus = mus + self.taus = taus + self.mu_t = T.stacklists(mus) + self.tau_t = T.stacklists(taus) + self.shape = shape + self.testval = np.zeros(self.shape, self.dtype) + self.last_cov_value = {} + self.last_tau_value = {} + self.param_fn = None + + def logp(self, value): + mu = self.mu_t[self.cat_var] + tau = self.tau_t[self.cat_var] + + delta = value - mu + k = tau.shape[0] + + return 1/2. * (-k * log(2*pi) + log(det(tau)) - dot(delta.T, dot(tau, delta))) + + def draw_mvn(self, mu, cov): + + return np.random.multivariate_normal(mean=mu, cov=cov) + + def draw(self, point): + if (self.param_fn is None): + self.param_fn = model.fastfn([self.cat_var, self.mu_t[self.cat_var], self.tau_t[self.cat_var]]) + + cat, mu, tau = self.param_fn(point) + # Cache cov = inv(tau) for each value of cat + cat = int(cat) + last_tau = self.last_tau_value.get(cat, None) + if (last_tau is not None and np.allclose(tau,last_tau)): + mcov = self.last_cov_value[cat] + else: + mcov = np_linalg.inv(tau) + self.last_cov_value[cat] = mcov + self.last_tau_value[cat] = tau + + return self.draw_mvn(mu, mcov) + +_impossible = float('-inf') +class GMMProposal(GenericProposal): + + def __init__(self, gmm_var, model=None): + super(GMMProposal, self).__init__([gmm_var]) + assert isinstance(gmm_var.distribution, MvGaussianMixture) + model = modelcontext(model) + self.gmm_var = gmm_var + self.logpfunc = model.fastfn(gmm_var.distribution.logp(gmm_var)) + self._proposal_logp_difference = 0.0 + + + def propose_move(self, from_point, point): + old_logp = self.logpfunc(from_point) + point[self.gmm_var.name] = self.gmm_var.distribution.draw(point) + new_logp = self.logpfunc(point) + if (old_logp!=np.nan and old_logp!=_impossible): + self._proposal_logp_difference = old_logp-new_logp + else: + self._proposal_logp_difference = 0.0 + return point + + def proposal_logp_difference(self): + return self._proposal_logp_difference + diff --git a/pymc/distributions/multivariate.py b/pymc/distributions/multivariate.py index 945e39265c..76eda5d61f 100644 --- a/pymc/distributions/multivariate.py +++ b/pymc/distributions/multivariate.py @@ -1,7 +1,8 @@ from .dist_math import * from theano.sandbox.linalg import det, solve, matrix_inverse, trace -from theano.tensor import dot, cast +from theano.tensor import dot, cast, gt +from theano.ifelse import ifelse from theano.printing import Print __all__ = ['MvNormal', 'Dirichlet', 'Multinomial', 'Wishart'] @@ -134,46 +135,116 @@ class Wishart(Continuous): distribution of the maximum-likelihood estimator (MLE) of the precision matrix of a multivariate normal distribution. If V=1, the distribution is identical to the chi-square distribution with n degrees of freedom. + + It is also the conjugate Prior for the Precision Matrix parameter of a + multivariate normal. + + This follows the parameterization given in formula 290 and 291 on page 24 + of [Kevin Murphy, Conjugate Bayesian Analysis of the Gaussian Distribution] + available at http://www.cs.ubc.ca/~murphyk/Papers/bayesGauss.pdf + see that paper for further details and references. + + To create noninformative priors for covariance or precision matrices, see + Huang and Wang, "Simple Marginally Noninformative Prior Distributions + for Covariance Matrices" ( http://ba.stat.cmu.edu/journal/2013/vol08/issue02/huang.pdf ) + and Gelman, "Prior Distributions for variance parameters in hierarchical models" + ( https://faculty.washington.edu/jmiyamot/bayes/gelmana%20prior%20distributions%20f%20variance%20parameters%20i%20hierarchical%20mods.pdf ) + as well as http://www.themattsimpson.com/2012/08/20/prior-distributions-for-covariance-matrices-the-scaled-inverse-wishart-prior/ + and http://dahtah.wordpress.com/2012/03/07/why-an-inverse-wishart-prior-may-not-be-such-a-good-idea/ + for a discussion of possible problems with priors of covariance and / or precision matrices + + :Parameters: + v : int + Degrees of freedom, v > p-1 . + S : ndarray + p x p positive definite matrix (scale matrix / scatter matrix). Can be interpreted as the scatter matrix of v p-variate observations. - For an alternative parameterization based on :math:`C=T{-1}` (Not yet implemented) + :Support: + X : matrix + Symmetric, positive definite. + """ + def __init__(self, v, S, *args, **kwargs): + super(Wishart, self).__init__(*args, **kwargs) + self.v = v + self.p = p = S.shape[0] + self.S = S + + self.mean = v * S + self.mode = switch(1*(v > p + 1), + (v - p - 1) * S, + nan) + 'TODO: We should pre-compute the following if the parameters are fixed' + self.invalid = theano.tensor.fill(S, nan) # Invalid result, if v

0. - V : ndarray - p x p positive definite matrix + v : int + Degrees of freedom, v > p - 1 + S : ndarray + p x p positive definite matrix (scale matrix / scatter matrix). Can be interpreted as the scatter matrix of v p-variate observations. :Support: X : matrix Symmetric, positive definite. """ - def __init__(self, n, V, *args, **kwargs): + def __init__(self, v, S, *args, **kwargs): super(Wishart, self).__init__(*args, **kwargs) - self.n = n - self.p = p = V.shape[0] - self.V = V - self.mean = n * V - self.mode = switch(1*(n >= p + 1), - (n - p - 1) * V, - nan) + self.v = v + self.S = S + self.p = p = S.shape[0] + self.inv_S = matrix_inverse(S) + + 'TODO: We should pre-compute the following if the parameters are fixed' + self.invalid = theano.tensor.fill(S, nan) # Invalid result, if v

(p - 1)) diff --git a/pymc/distributions/special_priors.py b/pymc/distributions/special_priors.py new file mode 100644 index 0000000000..9fcf902b2b --- /dev/null +++ b/pymc/distributions/special_priors.py @@ -0,0 +1,40 @@ + +from .multivariate import InverseWishart +from .continuous import InverseGamma +from sys import float_info +import numpy as np +from pymc.distributions.continuous import InverseGamma +from theano.sandbox.linalg.ops import diag, psd + +def huang_wang_covariance_prior(name1, name2, d, observed_scatter_matrix=None, observed_sample_size=0, model=None): + ''' + Construct a noninformative or informative prior for a mv normal covariance matrix + following Huang and Wang, "Simple Marginally Noninformative Prior Distributions + for Covariance Matrices" ( http://ba.stat.cmu.edu/journal/2013/vol08/issue02/huang.pdf ) + + If no observations are included, the resulting prior has an almost flat half-t distribution + over the variance of the variables, while efficiently having an + uniform-prior (range [-1,1] for the correlation coefficients. + + This prior does not introduce a dependency between variance and correlation strength, as happens with + a simple InverseWishart prior. + + Arguments: + name1: Name for the Inverse Wishart distribution which will be created + name2: Name for the Inverse Gamma distribution which will be created as a prior for the diagonal elements of the inv_S param of the Inverse Wishart + d: Dimensionality, i.e. number of variables to create a joint covariance prior for. + observed_scatter_matrix: d x d dimensional scatter matrix of (possibly virtual) observations to combine the noninformative prior with, to form an informative prior (or posterior). + model: (optional) the model + Returns: + A tuple consisting of the covariance prior (an InverseWishart), and the hyperprior (InverseGamma) for the + diagonal elements of the InverseWishart inv_S parameter + ''' + + A = float_info.max / 4. # Large number + d_ones = np.ones(d, dtype=np.float64) + a_hyperprior = InverseGamma(name=name2, d_ones/2., d_ones / A, model=model) + S = diag(4. / a_hyperprior) + if (observed_scatter_matrix is not None): + S = S + observed_scatter_matrix + cov_prior = InverseWishart(name=name1, d+1+observed_sample_size, S, model=model) + return cov_prior, a_hyperprior \ No newline at end of file diff --git a/pymc/distributions/transforms.py b/pymc/distributions/transforms.py index 6a24589459..a1ffe9a3d5 100644 --- a/pymc/distributions/transforms.py +++ b/pymc/distributions/transforms.py @@ -34,8 +34,14 @@ def __init__(self, dist, transform, *args, **kwargs): transform : Transform args, kwargs arguments to Distribution""" - forward = transform.forward + forward = transform.forward + defaults = dist.defaults try: + # If we don't do this, we will have problems + # with invalid mode values of transformed Dirichlet variables where one of the hyperparams is exactly 1 + self.default_val = forward(dist.default()) + defaults = ['default_val'] + defaults + testval = forward(dist.testval) except TypeError: testval = dist.testval @@ -52,7 +58,7 @@ def __init__(self, dist, transform, *args, **kwargs): super(TransformedDistribution, self).__init__(v.shape.tag.test_value, v.dtype, - testval, dist.defaults, + testval, defaults, *args, **kwargs) diff --git a/pymc/examples/gmm_mixture_sampling.py b/pymc/examples/gmm_mixture_sampling.py new file mode 100644 index 0000000000..2f49aa5e30 --- /dev/null +++ b/pymc/examples/gmm_mixture_sampling.py @@ -0,0 +1,135 @@ +import numpy as np +from pymc import * + +from pymc.step_methods.metropolis_hastings import * +from pymc.distributions.mixtures import * +from pymc.distributions.special import gammaln +from pymc.distributions.dist_math import * + +import theano +import theano.tensor as T +from theano.ifelse import ifelse as theano_ifelse +import pandas +import matplotlib.pylab as plt +import numpy.linalg + +theano.config.mode = 'FAST_COMPILE' +theano.config.compute_test_value = 'raise' + +''' +Example which demonstrates the use of the MvGaussianMixture as a flexible (i.e. arbitrary) Prior +and the usage of the MetropolisHastings step method with a custom Proposal +''' + +# Some helper methods +def is_pos_def(x): + return np.all(np.linalg.eigvals(x) > 0) + +def tmin(a,b): + return T.switch(T.lt(a,b), a, b) + +def tmax(a,b): + return T.switch(T.gt(a,b), a, b) + +''' We construct a number of valid +parameters for the multivariate normal. + +In order to ensure that the covariance matrices are positive semidefinite, they are risen to the (matrix-) power of two. +Precision matrices are obtained by inverting the covariance matrices. +''' +def create_model(with_observations=False): + + # First, we create some parameters for a Multivariate Normal + mu0 = np.ones((2), dtype=np.float64) + cov0 = np.eye(2, dtype=np.float64) * 0.2 + tau0 = numpy.linalg.inv(cov0) + + mu1 = mu0+5.5 + cov1 = cov0.copy() * 1.4 + cov1[0,1] = 0.8 + cov1[1,0] = 0.8 + cov1 = np.linalg.matrix_power(cov1,2) + tau1 = numpy.linalg.inv(cov1) + + mu2 = mu0-2.5 + cov2 = cov0.copy() * 1.4 + cov2[0,1] = 0.8 + cov2[1,0] = 0.8 + cov2 = np.linalg.matrix_power(cov2,2) + tau2 = numpy.linalg.inv(cov2) + + mu3 = mu0*-0.5 + 2.0*mu2 + cov3 = cov0.copy() * 0.9 + cov3[0,1] = -0.8 + cov3[1,0] = -0.8 + cov3 = np.linalg.matrix_power(cov3,2) + tau3 = numpy.linalg.inv(cov3) + + m2 = np.array([3.0]) + aval = np.array([4, 3, 2., 1.]) + a = constant(aval) + + model = Model() + + with model: + k = 4 + p, p_m1 = model.TransformedVar( + 'p', Dirichlet.dist(a, shape=k), + simplextransform) + + c = Categorical('c', p) + gmm = MvGaussianMixture('gmm', shape=mu0.shape, cat_var=c, mus=[mu0,mu1,mu2,mu3], taus=[tau0, tau1,tau2,tau3]) + ' Now, we ensure these arbitrary values sum to one. What do we get ? A nice probability which we can, in turn, sample from (or observe)' + fac = T.constant( np.array([10.0], dtype=np.float64)) + gmm_p = Deterministic('gmm_p', (tmax(tmin(gmm[0], fac) / fac, T.constant(np.array([0.0], dtype=np.float64))))) + pbeta = Beta('pbeta', 1,1) + if (with_observations): + result = Bernoulli('result', p=gmm_p[0], observed=np.array([1,0,1], dtype=np.bool)) + else: + result = Bernoulli('result', p=gmm_p[0]) + ' Try this with a Metropolis instance, and watch it fail ..' + step = MetropolisHastings(vars=model.vars, proposals=[GMMProposal(gmm)]) + return model, step + +def plot_scatter_matrix(title, tr, fig=None): + if (fig is None): + fig = plt.Figure() + t6 = pandas.Series(tr['c']) + t8 = pandas.Series(tr['gmm'][:,0]) + t9 = pandas.Series(tr['gmm'][:,1]) + t10 = pandas.Series(tr['gmm_p'][:,0]) + t11 = pandas.Series(tr['pbeta']) + df = pandas.DataFrame({'cat' : t6, 'gmm_0' : t8, 'gmm_1' : t9, 'p' : t10, 'pbeta' : t11}) + pandas.scatter_matrix(df) + plt.title(title) + return fig + +def plot_p_prior_vs_posterior(prior_trace, posterior_trace, fig=None): + if (fig is None): + fig = plt.Figure() + pr = pandas.Series(prior_trace['gmm_p'][:,0]) + po = pandas.Series(posterior_trace['gmm_p'][:,0]) + plt.hist(pr, bins=40, range=(-0.1, 1.1), color='b', alpha=0.5, label='Prior') + plt.hist(po, bins=40, range=(-0.1, 1.1), color='g', alpha=0.5, label='Posterior') + plt.legend() + plt.show() + +def run(n=10000): + m1, step1 = create_model(False) + m2, step2 = create_model(True) + with m1: + trace1 = sample(n, step1) + with m2: + trace2 = sample(n, step2) + fig1 = plot_scatter_matrix('Prior Scatter Matrix', trace1) + plt.show() + fig2 = plot_scatter_matrix('Posterior Scatter Matrix', trace2) + plt.show() + plot_p_prior_vs_posterior(trace1, trace2) + plt.show() + +if __name__ == '__main__': + run() + print "Done" + + diff --git a/pymc/examples/mh_example.py b/pymc/examples/mh_example.py new file mode 100644 index 0000000000..f04676c2a6 --- /dev/null +++ b/pymc/examples/mh_example.py @@ -0,0 +1,43 @@ +import numpy as np +from pymc import * +from pymc.step_methods.metropolis_hastings import * +import theano +import theano.tensor as T +import pandas +import matplotlib.pylab as plt +#theano.config.mode = 'DebugMode' + +model = Model() +with model: + + k = 5 + a = constant(np.array([2, 6., 4, 2, 2])) + pa = a / T.sum(a) + + p, p_m1 = model.TransformedVar( + 'p', Dirichlet.dist(a, shape=k), + simplextransform) + + c = Categorical('c', p) + +def run(n=50000): + if n == "short": + n = 50 + with model: + ' Try this with a Metropolis instance, and watch it fail ..' + step = MetropolisHastings() + trace = sample(n, step) + return trace +if __name__ == '__main__': + tr = run() + t1 = pandas.Series(tr['p'][:,0]) + t2 = pandas.Series(tr['p'][:,1]) + t3 = pandas.Series(tr['p'][:,2]) + t4 = pandas.Series(tr['p'][:,3]) + t5 = pandas.Series(tr['p'][:,4]) + t6 = pandas.Series(tr['c']) + df = pandas.DataFrame({'a' : t1,'b' : t2, 'c' : t3, 'd' : t4, 'cat' : t6}) + pandas.scatter_matrix(df) + plt.show() + + diff --git a/pymc/step_methods/metropolis_hastings.py b/pymc/step_methods/metropolis_hastings.py new file mode 100644 index 0000000000..8623dc0b23 --- /dev/null +++ b/pymc/step_methods/metropolis_hastings.py @@ -0,0 +1,282 @@ +from numpy.linalg import cholesky + +from ..core import * +from ..model import FreeRV +from .quadpotential import quad_potential + +from .arraystep import * +from numpy.random import normal, standard_cauchy, standard_exponential, poisson, random, uniform +from numpy import round, exp, copy, where, log, isfinite +import numpy as np +from ..distributions.discrete import Categorical +import theano.tensor as T +import theano +from metropolis import * + +__all__ = ['MetropolisHastings', 'GenericProposal', 'ProposalAdapter'] + +''' +More general version of Metropolis, which allows to combine different proposal distributions for +different sets of variables and then do a single accept / reject. + +Can also handle asymmetric proposals, i.e. it's an implementation of the Metropolis Hastings +algorithm, not just the Metropolis algorithm. + +The main purpose of this class is to allow for tailored proposal distributions for specialized +applications, such as efficiently sampling from a mixture model. + +@author Kai Londenberg (Kai.Londenberg@gmail.com) +''' + +class GenericProposal(object): + + def __init__(self, vars): + ''' + Constructor for GenericProposal. To be called by subclass constructors. + Arguments: + vars: List of variables (FreeRV instances) this GenericProposal is responsible for. + ''' + for v in vars: + assert isinstance(v, FreeRV) + self.vars = vars + + ''' + Asymmetric proposal distribution. + Has to be able to calculate move propabilities + ''' + def proposal_logp_difference(self): + ''' Return log-probability difference of move from one point to another + for the last proposed move + + Returns 0.0 by default, which is true for all symmetric proposal distributions + ''' + return 0.0 + + def propose_move(self, from_point, point): + ''' + Propose a move: Return a new point proposal + Destructively writes to the given point, destroying the old values in it + Arguments: + old_point: State before proposals. Calculate previous logp with respect to this point + point: New proposal. Update it, and calculate new logp with respect to the updated version of this point. + ''' + raise NotImplementedError("Called abstract method") + + def tune_stepsize(self, stepsize_factor=1.0): + ''' + Tune proposal distribution to increase / decrease the stepsize of the proposals by the given factor (if possible) + ''' + pass + + +class ProposalAdapter(GenericProposal): + + def __init__(self, vars, proposal_dist=NormalProposal, scale=None): + super(ProposalAdapter, self).__init__(vars) + self.ordering = ArrayOrdering(vars) + if scale is None: + scale = np.ones(sum(v.dsize for v in vars)) + self.proposal = proposal_dist(scale) + self.discrete = np.array([v.dtype in discrete_types for v in vars]) + self.stepsize_factor = 1.0 + + + def propose_move(self, from_point, point): + i = 0 + delta = np.atleast_1d( self.proposal() * self.stepsize_factor) + for var, slc, shp in self.ordering.vmap: + dvar = delta[slc] + if self.discrete[i]: + dvar = round(delta[slc], 0).astype(int) + point[var] = point[var] + np.reshape(dvar, shp) + i += 1 + return point + + def tune_stepsize(self, stepsize_factor=1.0): + self.stepsize_factor = stepsize_factor + +class CategoricalProposal(GenericProposal): + + def __init__(self, vars, model=None): + super(CategoricalProposal, self).__init__(vars) + model = modelcontext(model) + self.vars = vars + varidx = T.iscalar('varidx') + logpt_elems = [] + self.paramfuncs = [] + self._proposal_logp_difference = 0.0 + + for i, v in enumerate(vars): + assert isinstance(v.distribution, Categorical) + self.paramfuncs.append(model.fastfn(T.as_tensor_variable(v.distribution.p))) + logpt_elems.append(v.distribution.logp(v)) + + self.logpfunc = model.fastfn(T.add(*logpt_elems)) + + def propose_move(self, from_point, point): + move_logp = 0.0 + for i, v in enumerate(self.vars): + weights = self.paramfuncs[i](point) + oldvalue = point[v.name] + newvalue = np.random.choice(len(weights),1, p=weights)[0] + 'Move probability: Probability of moving from new state to old state divided by probability of moving from old state to new state' + if (oldvalue!=newvalue): + move_logp += log(weights[oldvalue]) - log(weights[newvalue]) + point[v.name] = newvalue + + self._proposal_logp_difference = move_logp + return point + + def proposal_logp_difference(self): + return self._proposal_logp_difference + + +class MetropolisHastings(object): + + """ + Metropolis-Hastings sampling step + + Parameters + ---------- + vars : list + List of variables for sampler. + proposals : array of GenericProposal instances. For variables not covered by these, but present in "vars" list above, the constructor will choose Proposal distributions automatically + scaling : scalar or array + Initial scale factor for proposal. Defaults to 1. + tune : bool + Flag for tuning. Defaults to True. + model : PyMC Model + Optional model for sampling step. Defaults to None (taken from context). + + """ + def __init__(self, vars=None, proposals=None, scaling=1., tune=True, tune_interval=100, model=None): + model = modelcontext(model) + self.model = model + if ((vars is None) and (proposals is None)): + vars = model.vars + + covered_vars = set() + if (proposals is not None): + for p in proposals: + for v in p.vars: + covered_vars.add(v) + if (vars is not None): + vset = set(vars) | set(covered_vars) + else: + vset = set(covered_vars) + self.vars = list(vset) + remaining_vars = vset-covered_vars + if (proposals is None): + proposals = [] + if (len(remaining_vars)>0): + cvars = [] + ovars = [] + for v in vars: # We don't iterate over remaining vars, because the ordering of the vars might be important somehow.. + if (v in covered_vars): + continue + if (isinstance(v.distribution, Categorical)): + cvars.append(v) + else: + ovars.append(v) + # Make sure we sample the categorical ones first. Others might switch their regime accordingly .. + if (len(cvars)>0): + proposals = [ CategoricalProposal(cvars)] + proposals + if (len(ovars)>0): + proposals.append(ProposalAdapter(ovars, NormalProposal)) + self.proposals = proposals + vars = self.vars + for p in proposals: + p.tune_stepsize(scaling) + + self.scaling = scaling + self.tune = tune + self.tune_interval = tune_interval + self.steps_until_tune = tune_interval + self.accepted = 0 + self.fastlogp = self.model.fastlogp + self.oldlogp = None + self.acceptance_rate = [] + + def step(self, point): + self._ensure_tuned() + proposal = point.copy() + proposal_symmetry = 0.0 + for prop in self.proposals: + proposal = prop.propose_move(point, proposal) + proposal_symmetry += prop.proposal_logp_difference() + + + # Log Probability of old state + if (self.oldlogp is None): + self.oldlogp = self.fastlogp(point) + + # Log-Probability of proposed state + proposal_logp = self.fastlogp(proposal) + + # Log probability of move, takes proposal symmetry into account to ensure detailed balance + move_logp = proposal_symmetry + proposal_logp - self.oldlogp + + # Accept / Reject + if (move_logp==np.nan): + print "Warning: Invalid proposal in MetropolisHastings.step(..) (logp is not a number)" + if move_logp!=np.nan and log(uniform()) < move_logp: + # Accept proposed value + newpoint = proposal + self.oldlogp = proposal_logp + else: + # Reject proposed value + newpoint = point# + + self.steps_until_tune -= 1 + + return newpoint + + def _ensure_tuned(self): + if self.tune and not self.steps_until_tune: + # Tune scaling parameter + acc_rate = self.accepted / float(self.tune_interval) + self.acceptance_rate.append(acc_rate) + self.scaling = mh_tune(self.scaling, acc_rate) + for p in self.proposals: + p.tune_stepsize(self.scaling) + # Reset counter + self.steps_until_tune = self.tune_interval + self.accepted = 0 + +def mh_tune(scale, acc_rate): + """ + Tunes the scaling parameter for the proposal distribution + according to the acceptance rate over the last tune_interval: + + Rate Variance adaptation + ---- ------------------- + <0.001 x 0.1 + <0.05 x 0.5 + <0.2 x 0.9 + >0.5 x 1.1 + >0.75 x 2 + >0.95 x 10 + + """ + #print "Acceptance rate: %f - scale: %f" % (acc_rate, scale) + # Switch statement + if acc_rate < 0.001: + # reduce by 90 percent + scale *= 0.1 + elif acc_rate < 0.05: + # reduce by 50 percent + scale *= 0.5 + elif acc_rate < 0.2: + # reduce by ten percent + scale *= 0.9 + elif acc_rate > 0.95: + # increase by factor of ten + scale *= 10.0 + elif acc_rate > 0.75: + # increase by double + scale *= 2.0 + elif acc_rate > 0.5: + # increase by ten percent + scale *= 1.1 + + return scale \ No newline at end of file