pymc-devs · kadeng · Jun 5, 2014 · Jun 5, 2014 · Jun 5, 2014 · Jun 5, 2014
diff --git a/pymc/distributions/continuous.py b/pymc/distributions/continuous.py
@@ -8,6 +8,7 @@
 from __future__ import division
 
 from .dist_math import *
+import numpy
 from numpy.random import uniform as runiform, normal as rnormal
 
 __all__ = ['Uniform', 'Flat', 'Normal', 'Beta', 'Exponential', 'Laplace',
@@ -54,7 +55,7 @@ def logp(self, value):
         upper = self.upper
 
         return bound(
-            -log(upper - lower),
+            -log(upper - lower), # Inserted cast to prevent exception in theano.ifelse.ifelse
             lower <= value, value <= upper)
 
     def random(self, size=None):

diff --git a/pymc/distributions/discrete.py b/pymc/distributions/discrete.py
@@ -285,7 +285,7 @@ def logp(self, value):
         p = self.p
         k = self.k
 
-        return bound(log(p[value]),
+        return bound_scalar(t.sum(log(p[value])),
             value >= 0,
             value <= (k - 1),
             le(abs(sum(p) - 1), 1e-5))

diff --git a/pymc/distributions/dist_math.py b/pymc/distributions/dist_math.py
@@ -13,31 +13,68 @@
     zeros_like, ones, ones_like,
     concatenate, constant, argmax)
 
+import theano
 
-from numpy import pi, inf, nan
-from .special import gammaln, multigammaln
 
+from numpy import pi, inf, nan, float64, array
+from .special import gammaln, multigammaln
+from theano.ifelse import ifelse
 from theano.printing import Print
 from .distribution import *
 
+def guess_is_scalar(c):
+    bcast = None
+    if (isinstance(c, t.TensorType)):
+        return c.broadcastable == ()
+
+    if (isinstance(c, t.Variable) or isinstance(c, t.Constant)):
+        return c.type.broadcastable == ()
+
+    tt = t.as_tensor_variable(c)
+    return tt.type.broadcastable == ()
 
-def bound(logp, *conditions):
+impossible = constant(-inf, dtype=float64) # We re-use that constant
+
+def bound_scalar(logp, *conditions):
     """
     Bounds a log probability density with several conditions
 
     Parameters
     ----------
     logp : float
-    *conditionss : booleans
+    *conditions : booleans
 
     Returns
     -------
     logp if all conditions are true
     -inf if some are false
+    """        
+    cond = alltrue(conditions)
+    return ifelse(cond, t.cast(logp, dtype='float64'), impossible)
+
+
+def bound(logp, *conditions):
     """
+    Bounds a log probability density with several conditions
 
-    return switch(alltrue(conditions), logp, -inf)
+    Parameters
+    ----------
+    logp : float
+    *conditionss : booleans
 
+    Returns
+    -------
+    logp if all conditions are true
+    -inf if some are false
+    """
+    if (guess_is_scalar(logp)):
+        for c in conditions:
+            if (type(c)==bool):
+                continue
+            if (not guess_is_scalar(c)):
+                break
+            return bound_scalar(logp, *conditions)
+    return switch(alltrue(conditions), logp, impossible)
 
 def alltrue(vals):
     ret = 1
@@ -52,6 +89,11 @@ def logpow(x, m):
     """
     return switch(eq(x, 0) & eq(m, 0), 0, m * log(x))
 
+def logpow_relaxed(x, m):
+    """
+    Calculates log(x**m), unless m==0 or x==0 in which case it will return 0 
+    """
+    return switch(eq(x, 0) | eq(m, 0), 0, m * log(x))
 
 def factln(n):
     return gammaln(n + 1)

diff --git a/pymc/distributions/mixtures.py b/pymc/distributions/mixtures.py
@@ -0,0 +1,117 @@
+
+
+from pymc import *
+
+from numpy.random import normal
+import numpy as np
+import pylab as pl
+from itertools import product
+import theano
+import theano.tensor as T
+from theano.ifelse import ifelse
+from numpy import pi
+from numpy.random import uniform, multivariate_normal
+from pymc.model import FreeRV, ObservedRV
+import numpy as np
+import numpy.linalg as np_linalg
+from pymc.step_methods.metropolis_hastings import GenericProposal
+
+"""
+Multivariate Mixture distributions. 
+
+These distributions have a lot of potential uses. For one thing: They can be used to approximate every other density function,
+and there are a lot of algorithms for efficiently creating density estimates (i.e. Kernel Density Estimates (KDE),
+Gaussian Mixture Model learning and others.
+
+Once we have learned such a density, we could use it as a prior density. To do that, we need to be able to
+efficiently sample from it. Luckily, that's simple and efficient. Unless you use the wrong proposal distribution, of course.
+
+That's where these classes come in handy.
+
+@author Kai Londenberg ( [email protected] )    
+
+"""
+class MvGaussianMixture(Continuous):
+
+    def __init__(self, shape, cat_var, mus, taus, model=None, *args, **kwargs):
+        '''
+        Creates a continuous mixture distribution which can be efficiently evaluated and sampled from.
+
+        Args:
+            shape: Shape of the distribution. All kernels need to have this same shape as well.
+            weights: weight vector (may be a theano shared variable or expression. Must be able to evaluate this in the context of the model).
+            kernels: list of mixture component distributions. These have to be MixtureKernel instances (for example MvNormalKernel ) of same shape
+            testval: Test value for logp computations
+        '''
+        super(MvGaussianMixture, self).__init__(*args, shape=shape, **kwargs)
+        assert isinstance(cat_var, FreeRV)
+        assert isinstance(cat_var.distribution, Categorical)
+        self.cat_var = cat_var
+        self.model = modelcontext(model)
+        weights = cat_var.distribution.p
+        self.weights = weights
+        self.mus = mus
+        self.taus = taus
+        self.mu_t = T.stacklists(mus)
+        self.tau_t = T.stacklists(taus)
+        self.shape = shape
+        self.testval = np.zeros(self.shape, self.dtype)
+        self.last_cov_value = {}
+        self.last_tau_value = {}
+        self.param_fn = None
+
+    def logp(self, value):
+        mu = self.mu_t[self.cat_var]
+        tau = self.tau_t[self.cat_var]
+
+        delta = value - mu
+        k = tau.shape[0]
+
+        return 1/2. * (-k * log(2*pi) + log(det(tau)) - dot(delta.T, dot(tau, delta)))
+
+    def draw_mvn(self, mu, cov):
+
+        return np.random.multivariate_normal(mean=mu, cov=cov)
+
+    def draw(self, point):
+        if (self.param_fn is None):
+            self.param_fn = model.fastfn([self.cat_var, self.mu_t[self.cat_var], self.tau_t[self.cat_var]])
+
+        cat, mu, tau = self.param_fn(point)
+        # Cache cov = inv(tau) for each value of cat
+        cat = int(cat)
+        last_tau = self.last_tau_value.get(cat, None)
+        if (last_tau is not None and np.allclose(tau,last_tau)):
+            mcov = self.last_cov_value[cat]
+        else:
+            mcov = np_linalg.inv(tau)
+            self.last_cov_value[cat] = mcov
+            self.last_tau_value[cat] = tau
+
+        return self.draw_mvn(mu, mcov)
+
+_impossible = float('-inf')
+class GMMProposal(GenericProposal):
+
+    def __init__(self, gmm_var, model=None):
+        super(GMMProposal, self).__init__([gmm_var])
+        assert isinstance(gmm_var.distribution, MvGaussianMixture)
+        model = modelcontext(model)
+        self.gmm_var = gmm_var
+        self.logpfunc = model.fastfn(gmm_var.distribution.logp(gmm_var))
+        self._proposal_logp_difference = 0.0
+
+
+    def propose_move(self, from_point, point):
+        old_logp = self.logpfunc(from_point) 
+        point[self.gmm_var.name] = self.gmm_var.distribution.draw(point)
+        new_logp = self.logpfunc(point) 
+        if (old_logp!=np.nan and old_logp!=_impossible):
+            self._proposal_logp_difference = old_logp-new_logp
+        else:
+            self._proposal_logp_difference = 0.0            
+        return point
+
+    def proposal_logp_difference(self):
+        return self._proposal_logp_difference
+
diff --git a/pymc/distributions/transforms.py b/pymc/distributions/transforms.py
@@ -34,8 +34,14 @@ def __init__(self, dist, transform, *args, **kwargs):
         transform : Transform
         args, kwargs
             arguments to Distribution"""
-        forward = transform.forward 
+        forward = transform.forward
+        defaults = dist.defaults 
         try:
+            # If we don't do this, we will have problems
+            # with invalid mode values of transformed Dirichlet variables where one of the hyperparams is exactly 1
+            self.default_val = forward(dist.default())
+            defaults = ['default_val'] + defaults
+
             testval = forward(dist.testval)
         except TypeError:
             testval = dist.testval
@@ -52,7 +58,7 @@ def __init__(self, dist, transform, *args, **kwargs):
 
         super(TransformedDistribution, self).__init__(v.shape.tag.test_value,
                 v.dtype, 
-                testval, dist.defaults, 
+                testval, defaults, 
                 *args, **kwargs)
 
 

diff --git a/pymc/examples/gmm_mixture_sampling.py b/pymc/examples/gmm_mixture_sampling.py
@@ -0,0 +1,135 @@
+import numpy as np
+from pymc import *
+
+from pymc.step_methods.metropolis_hastings import *
+from pymc.distributions.mixtures import *
+from pymc.distributions.special import gammaln
+from pymc.distributions.dist_math import *
+
+import theano
+import theano.tensor as T
+from theano.ifelse import ifelse as theano_ifelse
+import pandas
+import matplotlib.pylab as plt
+import numpy.linalg
+
+theano.config.mode = 'FAST_COMPILE'
+theano.config.compute_test_value = 'raise'
+
+''' 
+Example which demonstrates the use of the MvGaussianMixture as a flexible  (i.e. arbitrary) Prior
+and the usage of the MetropolisHastings step method with a custom Proposal 
+'''
+
+# Some helper methods
+def is_pos_def(x):
+    return np.all(np.linalg.eigvals(x) > 0)
+
+def tmin(a,b):
+        return T.switch(T.lt(a,b), a, b)
+
+def tmax(a,b):
+        return T.switch(T.gt(a,b), a, b)
+
+''' We construct a number of valid 
+parameters for the multivariate normal.
+
+In order to ensure that the covariance matrices are positive semidefinite, they are risen to the (matrix-) power of two.
+Precision matrices are obtained by inverting the covariance matrices.
+'''
+def create_model(with_observations=False):
+
+    # First, we create some parameters for a Multivariate Normal
+    mu0 = np.ones((2), dtype=np.float64)
+    cov0 = np.eye(2, dtype=np.float64) * 0.2
+    tau0 = numpy.linalg.inv(cov0)
+
+    mu1 = mu0+5.5
+    cov1 = cov0.copy() * 1.4
+    cov1[0,1] = 0.8
+    cov1[1,0] = 0.8
+    cov1 = np.linalg.matrix_power(cov1,2)
+    tau1 = numpy.linalg.inv(cov1)
+
+    mu2 = mu0-2.5
+    cov2 = cov0.copy() * 1.4
+    cov2[0,1] = 0.8
+    cov2[1,0] = 0.8
+    cov2 = np.linalg.matrix_power(cov2,2)
+    tau2 = numpy.linalg.inv(cov2)
+
+    mu3 = mu0*-0.5 + 2.0*mu2
+    cov3 = cov0.copy() * 0.9
+    cov3[0,1] = -0.8
+    cov3[1,0] = -0.8
+    cov3 = np.linalg.matrix_power(cov3,2)
+    tau3 = numpy.linalg.inv(cov3)
+
+    m2 = np.array([3.0])
+    aval = np.array([4, 3, 2., 1.])
+    a = constant(aval)
+
+    model = Model()
+
+    with model:
+        k = 4    
+        p, p_m1 = model.TransformedVar(
+            'p', Dirichlet.dist(a, shape=k),
+          simplextransform)
+
+        c = Categorical('c', p)
+        gmm = MvGaussianMixture('gmm', shape=mu0.shape, cat_var=c, mus=[mu0,mu1,mu2,mu3], taus=[tau0, tau1,tau2,tau3])
+        ' Now, we ensure these arbitrary values sum to one. What do we get ? A nice probability which we can, in turn, sample from (or observe)'
+        fac = T.constant( np.array([10.0], dtype=np.float64))
+        gmm_p = Deterministic('gmm_p', (tmax(tmin(gmm[0], fac) / fac, T.constant(np.array([0.0], dtype=np.float64)))))
+        pbeta = Beta('pbeta', 1,1)
+        if (with_observations):
+            result = Bernoulli('result', p=gmm_p[0], observed=np.array([1,0,1], dtype=np.bool))
+        else:
+            result = Bernoulli('result', p=gmm_p[0]) 
+        ' Try this with a Metropolis instance, and watch it fail ..'
+        step = MetropolisHastings(vars=model.vars, proposals=[GMMProposal(gmm)])
+    return model, step
+
+def plot_scatter_matrix(title, tr, fig=None):
+    if (fig is None):
+        fig = plt.Figure()
+    t6 = pandas.Series(tr['c'])
+    t8 = pandas.Series(tr['gmm'][:,0])
+    t9 = pandas.Series(tr['gmm'][:,1])
+    t10 = pandas.Series(tr['gmm_p'][:,0])
+    t11 = pandas.Series(tr['pbeta'])
+    df = pandas.DataFrame({'cat' : t6, 'gmm_0' : t8, 'gmm_1' : t9, 'p' : t10, 'pbeta' : t11})
+    pandas.scatter_matrix(df)
+    plt.title(title)
+    return fig
+
+def plot_p_prior_vs_posterior(prior_trace, posterior_trace, fig=None):
+    if (fig is None):
+        fig = plt.Figure()
+    pr = pandas.Series(prior_trace['gmm_p'][:,0])
+    po = pandas.Series(posterior_trace['gmm_p'][:,0])
+    plt.hist(pr, bins=40, range=(-0.1, 1.1), color='b', alpha=0.5, label='Prior')
+    plt.hist(po, bins=40, range=(-0.1, 1.1), color='g', alpha=0.5, label='Posterior')
+    plt.legend()
+    plt.show()
+
+def run(n=10000):
+    m1, step1 = create_model(False)
+    m2, step2 = create_model(True)
+    with m1:
+        trace1 = sample(n, step1)
+    with m2:
+        trace2 = sample(n, step2)
+    fig1 = plot_scatter_matrix('Prior Scatter Matrix', trace1)
+    plt.show()
+    fig2 = plot_scatter_matrix('Posterior Scatter Matrix', trace2)
+    plt.show()
+    plot_p_prior_vs_posterior(trace1, trace2)
+    plt.show()
+
+if __name__ == '__main__':
+    run()
+    print "Done"
+
+