diff --git a/pymc3/distributions/distribution.py b/pymc3/distributions/distribution.py index dd655ada61..248720101d 100644 --- a/pymc3/distributions/distribution.py +++ b/pymc3/distributions/distribution.py @@ -221,7 +221,7 @@ def __new__(cls, *args, **kwargs): potential_parent = cls.get_contexts()[-1] # We have to make sure that the context is a _DrawValuesContext # and not a Model - if isinstance(potential_parent, cls): + if isinstance(potential_parent, _DrawValuesContext): instance._parent = potential_parent else: instance._parent = None @@ -235,7 +235,8 @@ def __init__(self): # another _DrawValuesContext will share the reference to the # drawn_vars dictionary. This means that separate branches # in the nested _DrawValuesContext context tree will see the - # same drawn values + # same drawn values. + # The drawn_vars keys shall be (RV, size) tuples self.drawn_vars = self.parent.drawn_vars else: self.drawn_vars = dict() @@ -245,6 +246,22 @@ def parent(self): return self._parent +class _DrawValuesContextBlocker(_DrawValuesContext, metaclass=InitContextMeta): + """ + Context manager that starts a new drawn variables context disregarding all + parent contexts. This can be used inside a random method to ensure that + the drawn values wont be the ones cached by previous calls + """ + def __new__(cls, *args, **kwargs): + # resolves the parent instance + instance = super(_DrawValuesContextBlocker, cls).__new__(cls) + instance._parent = None + return instance + + def __init__(self): + self.drawn_vars = dict() + + def is_fast_drawable(var): return isinstance(var, (numbers.Number, np.ndarray, @@ -288,14 +305,14 @@ def draw_values(params, point=None, size=None): continue name = getattr(p, 'name', None) - if p in drawn: + if (p, size) in drawn: # param was drawn in related contexts - v = drawn[p] + v = drawn[(p, size)] evaluated[i] = v elif name is not None and name in point: # param.name is in point v = point[name] - evaluated[i] = drawn[p] = v + evaluated[i] = drawn[(p, size)] = v else: # param still needs to be drawn symbolic_params.append((i, p)) @@ -330,12 +347,12 @@ def draw_values(params, point=None, size=None): named_nodes_children[k].update(nnc[k]) # Init givens and the stack of nodes to try to `_draw_value` from - givens = {p.name: (p, v) for p, v in drawn.items() + givens = {p.name: (p, v) for (p, size), v in drawn.items() if getattr(p, 'name', None) is not None} stack = list(leaf_nodes.values()) # A queue would be more appropriate while stack: next_ = stack.pop(0) - if next_ in drawn: + if (next_, size) in drawn: # If the node already has a givens value, skip it continue elif isinstance(next_, (tt.TensorConstant, @@ -364,14 +381,14 @@ def draw_values(params, point=None, size=None): givens=temp_givens, size=size) givens[next_.name] = (next_, value) - drawn[next_] = value + drawn[(next_, size)] = value except theano.gof.fg.MissingInputError: # The node failed, so we must add the node's parents to # the stack of nodes to try to draw from. We exclude the # nodes in the `params` list. stack.extend([node for node in named_nodes_parents[next_] if node is not None and - node.name not in drawn and + (node, size) not in drawn and node not in params]) # the below makes sure the graph is evaluated in order @@ -386,15 +403,15 @@ def draw_values(params, point=None, size=None): missing_inputs = set() for param_idx in to_eval: param = params[param_idx] - if param in drawn: - evaluated[param_idx] = drawn[param] + if (param, size) in drawn: + evaluated[param_idx] = drawn[(param, size)] else: try: # might evaluate in a bad order, value = _draw_value(param, point=point, givens=givens.values(), size=size) - evaluated[param_idx] = drawn[param] = value + evaluated[param_idx] = drawn[(param, size)] = value givens[param.name] = (param, value) except theano.gof.fg.MissingInputError: missing_inputs.add(param_idx) @@ -475,8 +492,11 @@ def _draw_value(param, point=None, givens=None, size=None): # reset shape to account for shape changes # with theano.shared inputs dist_tmp.shape = np.array([]) - val = np.atleast_1d(dist_tmp.random(point=point, - size=None)) + # We want to draw values to infer the dist_shape, + # we don't want to store these drawn values to the context + with _DrawValuesContextBlocker(): + val = np.atleast_1d(dist_tmp.random(point=point, + size=None)) # Sometimes point may change the size of val but not the # distribution's shape if point and size is not None: @@ -493,20 +513,34 @@ def _draw_value(param, point=None, givens=None, size=None): variables, values = list(zip(*givens)) else: variables = values = [] - func = _compile_theano_function(param, variables) + # We only truly care if the ancestors of param that were given + # value have the matching dshape and val.shape + param_ancestors = \ + set(theano.gof.graph.ancestors([param], + blockers=list(variables)) + ) + inputs = [(var, val) for var, val in + zip(variables, values) + if var in param_ancestors] + if inputs: + input_vars, input_vals = list(zip(*inputs)) + else: + input_vars = [] + input_vals = [] + func = _compile_theano_function(param, input_vars) if size is not None: size = np.atleast_1d(size) dshaped_variables = all((hasattr(var, 'dshape') - for var in variables)) + for var in input_vars)) if (values and dshaped_variables and not all(var.dshape == getattr(val, 'shape', tuple()) - for var, val in zip(variables, values))): - output = np.array([func(*v) for v in zip(*values)]) + for var, val in zip(input_vars, input_vals))): + output = np.array([func(*v) for v in zip(*input_vals)]) elif (size is not None and any((val.ndim > var.ndim) - for var, val in zip(variables, values))): - output = np.array([func(*v) for v in zip(*values)]) + for var, val in zip(input_vars, input_vals))): + output = np.array([func(*v) for v in zip(*input_vals)]) else: - output = func(*values) + output = func(*input_vals) return output raise ValueError('Unexpected type in draw_value: %s' % type(param)) @@ -515,7 +549,11 @@ def to_tuple(shape): """Convert ints, arrays, and Nones to tuples""" if shape is None: return tuple() - return tuple(np.atleast_1d(shape)) + temp = np.atleast_1d(shape) + if temp.size == 0: + return tuple() + else: + return tuple(temp) def _is_one_d(dist_shape): if hasattr(dist_shape, 'dshape') and dist_shape.dshape in ((), (0,), (1,)): diff --git a/pymc3/distributions/mixture.py b/pymc3/distributions/mixture.py index 817a6343d6..2333a83505 100644 --- a/pymc3/distributions/mixture.py +++ b/pymc3/distributions/mixture.py @@ -5,7 +5,8 @@ from ..math import logsumexp from .dist_math import bound, random_choice from .distribution import (Discrete, Distribution, draw_values, - generate_samples, _DrawValuesContext) + generate_samples, _DrawValuesContext, + _DrawValuesContextBlocker, to_tuple) from .continuous import get_tau_sigma, Normal @@ -102,6 +103,35 @@ def __init__(self, w, comp_dists, *args, **kwargs): super().__init__(shape, dtype, defaults=defaults, *args, **kwargs) + @property + def comp_dists(self): + return self._comp_dists + + @comp_dists.setter + def comp_dists(self, _comp_dists): + self._comp_dists = _comp_dists + # Tests if the comp_dists can call random with non None size + with _DrawValuesContextBlocker(): + if isinstance(self.comp_dists, (list, tuple)): + try: + [comp_dist.random(size=23) + for comp_dist in self.comp_dists] + self._comp_dists_vect = True + except Exception: + # The comp_dists cannot call random with non None size or + # without knowledge of the point so we assume that we will + # have to iterate calls to random to get the correct size + self._comp_dists_vect = False + else: + try: + self.comp_dists.random(size=23) + self._comp_dists_vect = True + except Exception: + # The comp_dists cannot call random with non None size or + # without knowledge of the point so we assume that we will + # have to iterate calls to random to get the correct size + self._comp_dists_vect = False + def _comp_logp(self, value): comp_dists = self.comp_dists @@ -131,13 +161,33 @@ def _comp_modes(self): axis=1)) def _comp_samples(self, point=None, size=None): - try: - samples = self.comp_dists.random(point=point, size=size) - except AttributeError: - samples = np.column_stack([comp_dist.random(point=point, size=size) - for comp_dist in self.comp_dists]) - - return np.squeeze(samples) + if self._comp_dists_vect or size is None: + try: + return self.comp_dists.random(point=point, size=size) + except AttributeError: + samples = np.array([comp_dist.random(point=point, size=size) + for comp_dist in self.comp_dists]) + samples = np.moveaxis(samples, 0, samples.ndim - 1) + else: + # We must iterate the calls to random manually + size = to_tuple(size) + _size = int(np.prod(size)) + try: + samples = np.array([self.comp_dists.random(point=point, + size=None) + for _ in range(_size)]) + samples = np.reshape(samples, size + samples.shape[1:]) + except AttributeError: + samples = np.array([[comp_dist.random(point=point, size=None) + for _ in range(_size)] + for comp_dist in self.comp_dists]) + samples = np.moveaxis(samples, 0, samples.ndim - 1) + samples = np.reshape(samples, size + samples[1:]) + + if samples.shape[-1] == 1: + return samples[..., 0] + else: + return samples def logp(self, value): w = self.w @@ -147,42 +197,99 @@ def logp(self, value): broadcast_conditions=False) def random(self, point=None, size=None): + # Convert size to tuple + size = to_tuple(size) + # Draw mixture weights and a sample from each mixture to infer shape with _DrawValuesContext() as draw_context: - w = draw_values([self.w], point=point)[0] + # We first need to check w and comp_tmp shapes and re compute size + w = draw_values([self.w], point=point, size=size)[0] + with _DrawValuesContextBlocker(): + # We don't want to store the values drawn here in the context + # because they wont have the correct size comp_tmp = self._comp_samples(point=point, size=None) - if np.asarray(self.shape).size == 0: - distshape = np.asarray(np.broadcast(w, comp_tmp).shape)[..., :-1] + + # When size is not None, it's hard to tell the w parameter shape + if size is not None and w.shape[:len(size)] == size: + w_shape = w.shape[len(size):] + else: + w_shape = w.shape + + # Try to determine parameter shape and dist_shape + param_shape = np.broadcast(np.empty(w_shape), + comp_tmp).shape + if np.asarray(self.shape).size != 0: + dist_shape = np.broadcast(np.empty(self.shape), + np.empty(param_shape[:-1])).shape + else: + dist_shape = param_shape[:-1] + + # When size is not None, maybe dist_shape partially overlaps with size + if size is not None: + if size == dist_shape: + size = None + elif size[-len(dist_shape):] == dist_shape: + size = size[:len(size) - len(dist_shape)] + + # We get an integer _size instead of a tuple size for drawing the + # mixture, then we just reshape the output + if size is None: + _size = None else: - distshape = np.asarray(self.shape) + _size = int(np.prod(size)) + + # Now we must broadcast w to the shape that considers size, dist_shape + # and param_shape. However, we must take care with the cases in which + # dist_shape and param_shape overlap + if size is not None and w.shape[:len(size)] == size: + if w.shape[:len(size + dist_shape)] != (size + dist_shape): + # To allow w to broadcast, we insert new axis in between the + # "size" axis and the "mixture" axis + _w = w[(slice(None),) * len(size) + # Index the size axis + (np.newaxis,) * len(dist_shape) + # Add new axis for the dist_shape + (slice(None),)] # Close with the slice of mixture components + w = np.broadcast_to(_w, size + dist_shape + (param_shape[-1],)) + elif size is not None: + w = np.broadcast_to(w, size + dist_shape + (param_shape[-1],)) + else: + w = np.broadcast_to(w, dist_shape + (param_shape[-1],)) - # Normalize inputs - w /= w.sum(axis=-1, keepdims=True) + # Compute the total size of the mixture's random call with size + if _size is not None: + output_size = int(_size * np.prod(dist_shape) * param_shape[-1]) + else: + output_size = int(np.prod(dist_shape) * param_shape[-1]) + # Get the size we need for the mixture's random call + mixture_size = int(output_size // np.prod(comp_tmp.shape)) + if mixture_size == 1 and _size is None: + mixture_size = None + + # Semiflatten the mixture weights. The last axis is the number of + # mixture mixture components, and the rest is all about size, + # dist_shape and broadcasting + w = np.reshape(w, (-1, w.shape[-1])) + # Normalize mixture weights + w = w / w.sum(axis=-1, keepdims=True) w_samples = generate_samples(random_choice, p=w, broadcast_shape=w.shape[:-1] or (1,), - dist_shape=distshape, - size=size).squeeze() - if (size is None) or (distshape.size == 0): - with draw_context: - comp_samples = self._comp_samples(point=point, size=size) - if comp_samples.ndim > 1: - samples = np.squeeze(comp_samples[np.arange(w_samples.size), ..., w_samples]) - else: - samples = np.squeeze(comp_samples[w_samples]) + dist_shape=w.shape[:-1] or (1,), + size=size) + # Sample from the mixture + with draw_context: + mixed_samples = self._comp_samples(point=point, + size=mixture_size) + w_samples = w_samples.flatten() + # Semiflatten the mixture to be able to zip it with w_samples + mixed_samples = np.reshape(mixed_samples, (-1, comp_tmp.shape[-1])) + # Select the samples from the mixture + samples = np.array([mixed[choice] for choice, mixed in + zip(w_samples, mixed_samples)]) + # Reshape the samples to the correct output shape + if size is None: + samples = np.reshape(samples, dist_shape) else: - if w_samples.ndim == 1: - w_samples = np.reshape(np.tile(w_samples, size), (size,) + w_samples.shape) - samples = np.zeros((size,)+tuple(distshape)) - with draw_context: - for i in range(size): - w_tmp = w_samples[i, :] - comp_tmp = self._comp_samples(point=point, size=None) - if comp_tmp.ndim > 1: - samples[i, :] = np.squeeze(comp_tmp[np.arange(w_tmp.size), ..., w_tmp]) - else: - samples[i, :] = np.squeeze(comp_tmp[w_tmp]) - + samples = np.reshape(samples, size + dist_shape) return samples diff --git a/pymc3/distributions/multivariate.py b/pymc3/distributions/multivariate.py index 04ae00e56f..7d6d7d4a84 100755 --- a/pymc3/distributions/multivariate.py +++ b/pymc3/distributions/multivariate.py @@ -226,12 +226,13 @@ def __init__(self, mu, cov=None, tau=None, chol=None, lower=True, def random(self, point=None, size=None): if size is None: - size = [] + size = tuple() else: - try: - size = list(size) - except TypeError: - size = [size] + if not isinstance(size, tuple): + try: + size = tuple(size) + except TypeError: + size = (size,) if self._cov_type == 'cov': mu, cov = draw_values([self.mu, self.cov], point=point, size=size) @@ -242,23 +243,37 @@ def random(self, point=None, size=None): dist = stats.multivariate_normal( mean=mu, cov=cov, allow_singular=True) except ValueError: - size.append(mu.shape[-1]) + size += (mu.shape[-1],) return np.nan * np.zeros(size) return dist.rvs(size) elif self._cov_type == 'chol': - mu, chol = draw_values([self.mu, self.chol_cov], point=point, size=size) - if mu.shape[-1] != chol[0].shape[-1]: + mu, chol = draw_values([self.mu, self.chol_cov], + point=point, size=size) + if size and mu.ndim == len(size) and mu.shape == size: + mu = mu[..., np.newaxis] + if mu.shape[-1] != chol.shape[-1] and mu.shape[-1] != 1: raise ValueError("Shapes for mu and chol don't match") - - size.append(mu.shape[-1]) - standard_normal = np.random.standard_normal(size) - return mu + np.dot(standard_normal, chol.T) + broadcast_shape = ( + np.broadcast(np.empty(mu.shape[:-1]), + np.empty(chol.shape[:-2])).shape + ) + + mu = np.broadcast_to(mu, broadcast_shape + (chol.shape[-1],)) + chol = np.broadcast_to(chol, broadcast_shape + chol.shape[-2:]) + # If mu and chol were fixed by the point, only the standard normal + # should change + if mu.shape[:len(size)] != size: + std_norm_shape = size + mu.shape + else: + std_norm_shape = mu.shape + standard_normal = np.random.standard_normal(std_norm_shape) + return mu + np.tensordot(standard_normal, chol, axes=[[-1], [-1]]) else: mu, tau = draw_values([self.mu, self.tau], point=point, size=size) if mu.shape[-1] != tau[0].shape[-1]: raise ValueError("Shapes for mu and tau don't match") - size.append(mu.shape[-1]) + size += (mu.shape[-1],) try: chol = linalg.cholesky(tau, lower=True) except linalg.LinAlgError: @@ -1001,7 +1016,7 @@ def __init__(self, eta, n, sd_dist, *args, **kwargs): self.n = n self.eta = eta - if 'transform' in kwargs: + if 'transform' in kwargs and kwargs['transform'] is not None: raise ValueError('Invalid parameter: transform.') if 'shape' in kwargs: raise ValueError('Invalid parameter: shape.') @@ -1052,6 +1067,90 @@ def logp(self, x): return norm + logp_lkj + logp_sd + det_invjac + def _random(self, n, eta, size=1): + eta_sample_shape = (size,) + eta.shape + P = np.eye(n) * np.ones(eta_sample_shape + (n, n)) + # original implementation in R see: + # https://github.com/rmcelreath/rethinking/blob/master/R/distributions.r + beta = eta - 1. + n/2. + r12 = 2. * stats.beta.rvs(a=beta, b=beta, size=eta_sample_shape) - 1. + P[..., 0, 1] = r12 + P[..., 1, 1] = np.sqrt(1. - r12**2) + for mp1 in range(2, n): + beta -= 0.5 + y = stats.beta.rvs(a=mp1 / 2., b=beta, size=eta_sample_shape) + z = stats.norm.rvs(loc=0, scale=1, size=eta_sample_shape + (mp1,)) + z = z / np.sqrt(np.einsum('ij,ij->j', z, z)) + P[..., 0:mp1, mp1] = np.sqrt(y[..., np.newaxis]) * z + P[..., mp1, mp1] = np.sqrt(1. - y) + C = np.einsum('...ji,...jk->...ik', P, P) + D = np.atleast_1d(self.sd_dist.random(size=P.shape[:-2])) + if D.shape in [tuple(), (1,)]: + D = self.sd_dist.random(size=P.shape[:-1]) + elif D.ndim < C.ndim - 1: + D = [D] + [self.sd_dist.random(size=P.shape[:-2]) + for _ in range(n - 1)] + D = np.moveaxis(np.array(D), 0, C.ndim - 2) + elif D.ndim == C.ndim - 1: + if D.shape[-1] == 1: + D = [D] + [self.sd_dist.random(size=P.shape[:-2]) + for _ in range(n - 1)] + D = np.concatenate(D, axis=-1) + elif D.shape[-1] != n: + raise ValueError('The size of the samples drawn from the ' + 'supplied sd_dist.random have the wrong ' + 'size. Expected {} but got {} instead.'. + format(n, D.shape[-1])) + else: + raise ValueError('Supplied sd_dist.random generates samples with ' + 'too many dimensions. It must yield samples ' + 'with 0 or 1 dimensions. Got {} instead'. + format(D.ndim - C.ndim - 2)) + C *= D[..., :, np.newaxis] * D[..., np.newaxis, :] + tril_idx = np.tril_indices(n, k=0) + return np.linalg.cholesky(C)[..., tril_idx[0], tril_idx[1]] + + def random(self, point=None, size=None): + # Get parameters and broadcast them + n, eta = draw_values([self.n, self.eta], point=point, size=size) + broadcast_shape = np.broadcast(n, eta).shape + # We can only handle cov matrices with a constant n per random call + n = np.unique(n) + if len(n) > 1: + raise RuntimeError('Varying n is not supported for LKJCholeskyCov') + n = int(n[0]) + dist_shape = ((n * (n + 1)) // 2,) + # We make sure that eta and the drawn n get their shapes broadcasted + eta = np.broadcast_to(eta, broadcast_shape) + # We change the size of the draw depending on the broadcast shape + sample_shape = broadcast_shape + dist_shape + if size is not None: + if not isinstance(size, tuple): + try: + size = tuple(size) + except TypeError: + size = (size,) + if size == sample_shape: + size = None + elif size == broadcast_shape: + size = None + elif size[-len(sample_shape):] == sample_shape: + size = size[:len(size) - len(sample_shape)] + elif size[-len(broadcast_shape):] == broadcast_shape: + size = size[:len(size) - len(broadcast_shape)] + # We will always provide _random with an integer size and then reshape + # the output to get the correct size + if size is not None: + _size = np.prod(size) + else: + _size = 1 + samples = self._random(n, eta, size=_size) + if size is None: + samples = samples[0] + else: + samples = np.reshape(samples, size + sample_shape) + return samples + class LKJCorr(Continuous): R""" diff --git a/pymc3/distributions/transforms.py b/pymc3/distributions/transforms.py index 43fd695dd7..81ba1c055a 100644 --- a/pymc3/distributions/transforms.py +++ b/pymc3/distributions/transforms.py @@ -446,7 +446,7 @@ def forward(self, y): return tt.advanced_set_subtensor1(y, tt.log(y[self.diag_idxs]), self.diag_idxs) def forward_val(self, y, point=None): - y[self.diag_idxs] = np.log(y[self.diag_idxs]) + y[..., self.diag_idxs] = np.log(y[..., self.diag_idxs]) return y def jacobian_det(self, y): diff --git a/pymc3/model.py b/pymc3/model.py index 67ab20f7b9..4e0f7ec10a 100644 --- a/pymc3/model.py +++ b/pymc3/model.py @@ -121,6 +121,8 @@ def _get_named_nodes_and_relations(graph, parent, leaf_nodes, except KeyError: node_parents[graph] = {parent} node_children[parent].add(graph) + else: + node_parents[graph] = set() # Flag that the leaf node has no children node_children[graph] = set() else: # Intermediate node @@ -131,6 +133,8 @@ def _get_named_nodes_and_relations(graph, parent, leaf_nodes, except KeyError: node_parents[graph] = {parent} node_children[parent].add(graph) + else: + node_parents[graph] = set() # The current node will be set as the parent of the next # nodes only if it is a named node parent = graph diff --git a/pymc3/tests/test_distributions_random.py b/pymc3/tests/test_distributions_random.py index b5c0593806..0a2a380ce6 100644 --- a/pymc3/tests/test_distributions_random.py +++ b/pymc3/tests/test_distributions_random.py @@ -8,7 +8,9 @@ import theano import pymc3 as pm -from pymc3.distributions.distribution import draw_values +from pymc3.distributions.distribution import (draw_values, + _DrawValuesContext, + _DrawValuesContextBlocker) from .helpers import SeededTest from .test_distributions import ( build_model, Domain, product, R, Rplus, Rplusbig, Runif, Rplusdunif, @@ -114,6 +116,44 @@ def test_random_sample_returns_nd_array(self): assert isinstance(tau, np.ndarray) +class TestDrawValuesContext: + def test_normal_context(self): + with _DrawValuesContext() as context0: + assert context0.parent is None + context0.drawn_vars['root_test'] = 1 + with _DrawValuesContext() as context1: + assert id(context1.drawn_vars) == id(context0.drawn_vars) + assert context1.parent == context0 + with _DrawValuesContext() as context2: + assert id(context2.drawn_vars) == id(context0.drawn_vars) + assert context2.parent == context1 + context2.drawn_vars['leaf_test'] = 2 + assert context1.drawn_vars['leaf_test'] == 2 + context1.drawn_vars['root_test'] = 3 + assert context0.drawn_vars['root_test'] == 3 + assert context0.drawn_vars['leaf_test'] == 2 + + def test_blocking_context(self): + with _DrawValuesContext() as context0: + assert context0.parent is None + context0.drawn_vars['root_test'] = 1 + with _DrawValuesContext() as context1: + assert id(context1.drawn_vars) == id(context0.drawn_vars) + assert context1.parent == context0 + with _DrawValuesContextBlocker() as blocker: + assert id(blocker.drawn_vars) != id(context0.drawn_vars) + assert blocker.parent is None + blocker.drawn_vars['root_test'] = 2 + with _DrawValuesContext() as context2: + assert id(context2.drawn_vars) == id(blocker.drawn_vars) + assert context2.parent == blocker + context2.drawn_vars['root_test'] = 3 + context2.drawn_vars['leaf_test'] = 4 + assert blocker.drawn_vars['root_test'] == 3 + assert 'leaf_test' not in context1.drawn_vars + assert context0.drawn_vars['root_test'] == 1 + + class BaseTestCases: class BaseTestCase(SeededTest): shape = 5 diff --git a/pymc3/tests/test_mixture.py b/pymc3/tests/test_mixture.py index 54be0e6179..de6a95fc44 100644 --- a/pymc3/tests/test_mixture.py +++ b/pymc3/tests/test_mixture.py @@ -2,6 +2,7 @@ from numpy.testing import assert_allclose from .helpers import SeededTest +import pymc3 as pm from pymc3 import Dirichlet, Gamma, Normal, Lognormal, Poisson, Exponential, \ Mixture, NormalMixture, MvNormal, sample, Metropolis, Model import scipy.stats as st @@ -231,3 +232,55 @@ def mixmixlogp(value, point): assert_allclose(mixmixlogpg, mix.logp_elemwise(test_point)) assert_allclose(priorlogp + mixmixlogpg.sum(), model.logp(test_point)) + + def test_sample_prior_and_posterior(self): + def build_toy_dataset(N, K): + pi = np.array([0.2, 0.5, 0.3]) + mus = [[1, 1, 1], [-1, -1, -1], [2, -2, 0]] + stds = [[0.1, 0.1, 0.1], [0.1, 0.2, 0.2], [0.2, 0.3, 0.3]] + x = np.zeros((N, 3), dtype=np.float32) + y = np.zeros((N,), dtype=np.int) + for n in range(N): + k = np.argmax(np.random.multinomial(1, pi)) + x[n, :] = np.random.multivariate_normal(mus[k], + np.diag(stds[k])) + y[n] = k + return x, y + + N = 100 # number of data points + K = 3 # number of mixture components + D = 3 # dimensionality of the data + + X, y = build_toy_dataset(N, K) + + with pm.Model() as model: + pi = pm.Dirichlet('pi', np.ones(K)) + + comp_dist = [] + mu = [] + packed_chol = [] + chol = [] + for i in range(K): + mu.append(pm.Normal('mu%i' % i, 0, 10, shape=D)) + packed_chol.append( + pm.LKJCholeskyCov('chol_cov_%i' % i, + eta=2, + n=D, + sd_dist=pm.HalfNormal.dist(2.5)) + ) + chol.append(pm.expand_packed_triangular(D, packed_chol[i], + lower=True)) + comp_dist.append(pm.MvNormal.dist(mu=mu[i], chol=chol[i])) + + pm.Mixture('x_obs', pi, comp_dist, observed=X) + with model: + trace = pm.sample(30, tune=10, chains=1) + + n_samples = 20 + with model: + ppc = pm.sample_posterior_predictive(trace, n_samples) + prior = pm.sample_prior_predictive(samples=n_samples) + assert ppc['x_obs'].shape == (n_samples,) + X.shape + assert prior['x_obs'].shape == (n_samples,) + X.shape + assert prior['mu0'].shape == (n_samples, D) + assert prior['chol_cov_0'].shape == (n_samples, D * (D + 1) // 2)