From 803c49c9d1255d2b8b6e167f68dfdfb2b6448fa8 Mon Sep 17 00:00:00 2001
From: Luciano Paz <chulo1787@gmail.com>
Date: Wed, 5 Dec 2018 11:27:06 +0100
Subject: [PATCH 1/7] Partial fix. Got stumped with LKJCholeskyCov not having a
 random method

---
 pymc3/distributions/distribution.py | 24 ++++++++-
 pymc3/distributions/mixture.py      | 81 ++++++++++++++++++-----------
 pymc3/distributions/multivariate.py | 44 ++++++++++++++++
 pymc3/model.py                      |  4 ++
 4 files changed, 122 insertions(+), 31 deletions(-)

diff --git a/pymc3/distributions/distribution.py b/pymc3/distributions/distribution.py
index 0dcb6bdb48..0cf8770deb 100644
--- a/pymc3/distributions/distribution.py
+++ b/pymc3/distributions/distribution.py
@@ -249,6 +249,23 @@ def parent(self):
         return self._parent
 
 
+class _DrawValuesOutContext(six.with_metaclass(InitContextMeta,
+                                               _DrawValuesContext)):
+    """
+    Context manager that starts a new drawn variables context disregarding all
+    parent contexts. This can be used inside a random method to ensure that
+    the drawn values wont be the ones cached by previous calls
+    """
+    def __new__(cls, *args, **kwargs):
+        # resolves the parent instance
+        instance = super(_DrawValuesOutContext, cls).__new__(cls)
+        instance._parent = None
+        return instance
+
+    def __init__(self):
+        self.drawn_vars = dict()
+
+
 def is_fast_drawable(var):
     return isinstance(var, (numbers.Number,
                             np.ndarray,
@@ -519,7 +536,11 @@ def to_tuple(shape):
     """Convert ints, arrays, and Nones to tuples"""
     if shape is None:
         return tuple()
-    return tuple(np.atleast_1d(shape))
+    temp = np.atleast_1d(shape)
+    if temp.size == 0:
+        return tuple()
+    else:
+        return tuple(temp)
 
 def _is_one_d(dist_shape):
     if hasattr(dist_shape, 'dshape') and dist_shape.dshape in ((), (0,), (1,)):
@@ -586,6 +607,7 @@ def generate_samples(generator, *args, **kwargs):
     dist_shape = to_tuple(dist_shape)
     broadcast_shape = to_tuple(broadcast_shape)
     size_tup = to_tuple(size)
+    print(broadcast_shape, type(broadcast_shape))
 
     # All inputs are scalars, end up size (size_tup, dist_shape)
     if broadcast_shape in {(), (0,), (1,)}:
diff --git a/pymc3/distributions/mixture.py b/pymc3/distributions/mixture.py
index 08a3379be7..d6ff5e5c6d 100644
--- a/pymc3/distributions/mixture.py
+++ b/pymc3/distributions/mixture.py
@@ -5,7 +5,8 @@
 from ..math import logsumexp
 from .dist_math import bound, random_choice
 from .distribution import (Discrete, Distribution, draw_values,
-                           generate_samples, _DrawValuesContext)
+                           generate_samples, _DrawValuesContext,
+                           _DrawValuesOutContext)
 from .continuous import get_tau_sd, Normal
 
 
@@ -133,12 +134,16 @@ def _comp_modes(self):
 
     def _comp_samples(self, point=None, size=None):
         try:
-            samples = self.comp_dists.random(point=point, size=size)
+            return self.comp_dists.random(point=point, size=size)
         except AttributeError:
-            samples = np.column_stack([comp_dist.random(point=point, size=size)
-                                       for comp_dist in self.comp_dists])
+            samples = np.array([comp_dist.random(point=point, size=size)
+                                for comp_dist in self.comp_dists])
+            samples = np.moveaxis(samples, 0, samples.ndim - 1)
 
-        return np.squeeze(samples)
+        if samples.shape[-1] == 1:
+            return samples[..., 0]
+        else:
+            return samples
 
     def logp(self, value):
         w = self.w
@@ -149,40 +154,56 @@ def logp(self, value):
 
     def random(self, point=None, size=None):
         with _DrawValuesContext() as draw_context:
-            w = draw_values([self.w], point=point)[0]
+            # We first need to check w and comp_tmp shapes and re compute size
+            w = draw_values([self.w], point=point, size=size)[0]
+        with _DrawValuesOutContext():
+            # We don't want to store the values drawn here in the context
+            # because they wont have the correct size
             comp_tmp = self._comp_samples(point=point, size=None)
-        if np.asarray(self.shape).size == 0:
-            distshape = np.asarray(np.broadcast(w, comp_tmp).shape)[..., :-1]
+        param_shape = np.broadcast(w,
+                                   comp_tmp).shape
+        if np.asarray(self.shape).size != 0:
+            distshape = np.broadcast(np.empty(self.shape),
+                                     np.empty(param_shape[:-1])).shape
+        else:
+            distshape = param_shape[:-1]
+        w = np.broadcast_to(w, distshape + (param_shape[-1],))
+        if size is not None and not isinstance(size, tuple):
+            size = tuple(size)
+        if size is not None:
+            if size == distshape:
+                size = None
+                _size = -1
+            elif size[-len(distshape):] == distshape:
+                size = size[:len(size) - len(distshape)]
+                _size = np.prod(size)
+            else:
+                _size = np.prod(size)
         else:
-            distshape = np.asarray(self.shape)
+            _size = 1
+        output_size = _size * np.prod(distshape) * param_shape[-1]
+        underlying_size = output_size // np.prod(comp_tmp.shape)
 
         # Normalize inputs
-        w /= w.sum(axis=-1, keepdims=True)
+        w = np.reshape(w, (-1, w.shape[-1]))
+        w = w / w.sum(axis=-1, keepdims=True)
 
         w_samples = generate_samples(random_choice,
                                      p=w,
                                      broadcast_shape=w.shape[:-1] or (1,),
-                                     dist_shape=distshape,
-                                     size=size).squeeze()
-        if (size is None) or (distshape.size == 0):
-            with draw_context:
-                comp_samples = self._comp_samples(point=point, size=size)
-            if comp_samples.ndim > 1:
-                samples = np.squeeze(comp_samples[np.arange(w_samples.size), ..., w_samples])
-            else:
-                samples = np.squeeze(comp_samples[w_samples])
+                                     dist_shape=w.shape[:-1] or (1,),
+                                     size=size)
+        with draw_context:
+            mixed_samples = self._comp_samples(point=point,
+                                               size=underlying_size)
+        w_samples = w_samples.flatten()
+        mixed_samples = np.reshape(mixed_samples, (-1, comp_tmp.shape[-1]))
+        samples = np.array([mixed[choice] for choice, mixed in
+                            zip(w_samples, mixed_samples)])
+        if size is None:
+            samples = np.reshape(samples, distshape)
         else:
-            if w_samples.ndim == 1:
-                w_samples = np.reshape(np.tile(w_samples, size), (size,) + w_samples.shape)
-            samples = np.zeros((size,)+tuple(distshape))
-            with draw_context:
-                for i in range(size):
-                    w_tmp = w_samples[i, :]
-                    comp_tmp = self._comp_samples(point=point, size=None)
-                    if comp_tmp.ndim > 1:
-                        samples[i, :] = np.squeeze(comp_tmp[np.arange(w_tmp.size), ..., w_tmp])
-                    else:
-                        samples[i, :] = np.squeeze(comp_tmp[w_tmp])
+            samples = np.reshape(samples, size + distshape)
 
         return samples
 
diff --git a/pymc3/distributions/multivariate.py b/pymc3/distributions/multivariate.py
index d233055282..90b9986251 100755
--- a/pymc3/distributions/multivariate.py
+++ b/pymc3/distributions/multivariate.py
@@ -998,6 +998,49 @@ def logp(self, x):
 
         return norm + logp_lkj + logp_sd + det_invjac
 
+    def _random(self, n, eta, size=None):
+        size = size if isinstance(size, tuple) else (size,)
+        # original implementation in R see:
+        # https://github.com/rmcelreath/rethinking/blob/master/R/distributions.r
+        beta = eta - 1. + n/2.
+        r12 = 2. * stats.beta.rvs(a=beta, b=beta, size=size) - 1.
+        P = np.eye(n)[:, :, np.newaxis] * np.ones(size)
+        P[0, 1] = r12
+        P[1, 1] = np.sqrt(1. - r12**2)
+        for mp1 in range(2, n):
+            beta -= 0.5
+            y = stats.beta.rvs(a=mp1 / 2., b=beta, size=size)
+            z = stats.norm.rvs(loc=0, scale=1, size=(mp1, ) + size)
+            z = z / np.sqrt(np.einsum('ij,ij->j', z, z))
+            P[0:mp1, mp1] = np.sqrt(y) * z
+            P[mp1, mp1] = np.sqrt(1. - y)
+        C = np.einsum('ji...,jk...->...ik', P, P)
+        D = np.atleast_1d(self.sd_dist.random(size=size))
+        if D.shape == size:
+            D = np.array([D] + [self.sd_dist.random(size=size)
+                                for _ in range(C.shape[-1] - 1)])
+            D = np.moveaxis(np.array(D), 0, D.ndim - 1)
+        elif D.shape[-1] == 1:
+            D = [D] + [self.sd_dist.random(size=size)
+                       for _ in range(C.shape[-1] - 1)]
+            D = np.concatenate(D, axis=-1)
+        try:
+            C *= D[..., :, np.newaxis] * D[..., np.newaxis, :]
+        except TypeError:
+            pass
+        chol = np.linalg.cholesky(C)
+        tril_idx = np.tril_indices(n, k=0)
+        return chol[..., tril_idx[0], tril_idx[1]]
+
+    def random(self, point=None, size=None):
+        n, eta = draw_values([self.n, self.eta], point=point, size=size)
+        size= 1 if size is None else size
+        samples = generate_samples(self._random, n, eta,
+                                   dist_shape=self.shape,
+                                   broadcast_shape=self.shape,
+                                   size=size)
+        return samples
+
 
 class LKJCorr(Continuous):
     R"""
@@ -1097,6 +1140,7 @@ def random(self, point=None, size=None):
         n, eta = draw_values([self.n, self.eta], point=point, size=size)
         size= 1 if size is None else size
         samples = generate_samples(self._random, n, eta,
+                                   dist_shape=self.shape,
                                    broadcast_shape=(size,))
         return samples
 
diff --git a/pymc3/model.py b/pymc3/model.py
index c9ec85fac6..b9efc7e4f2 100644
--- a/pymc3/model.py
+++ b/pymc3/model.py
@@ -122,6 +122,8 @@ def _get_named_nodes_and_relations(graph, parent, leaf_nodes,
                 except KeyError:
                     node_parents[graph] = set([parent])
                 node_children[parent].add(graph)
+            else:
+                node_parents[graph] = set()
             # Flag that the leaf node has no children
             node_children[graph] = set()
     else:  # Intermediate node
@@ -132,6 +134,8 @@ def _get_named_nodes_and_relations(graph, parent, leaf_nodes,
                 except KeyError:
                     node_parents[graph] = set([parent])
                 node_children[parent].add(graph)
+            else:
+                node_parents[graph] = set()
             # The current node will be set as the parent of the next
             # nodes only if it is a named node
             parent = graph

From a3f7caa76e48f887a363af9f30449c70b3128cc0 Mon Sep 17 00:00:00 2001
From: Luciano Paz <chulo1787@gmail.com>
Date: Fri, 7 Dec 2018 15:32:39 +0100
Subject: [PATCH 2/7] Fixed the basic defects. Still must do new tests.

---
 pymc3/distributions/distribution.py |  39 ++++----
 pymc3/distributions/mixture.py      |  43 +++++++--
 pymc3/distributions/multivariate.py | 141 +++++++++++++++++++---------
 pymc3/distributions/transforms.py   |   2 +-
 4 files changed, 155 insertions(+), 70 deletions(-)

diff --git a/pymc3/distributions/distribution.py b/pymc3/distributions/distribution.py
index 0cf8770deb..ef146461e9 100644
--- a/pymc3/distributions/distribution.py
+++ b/pymc3/distributions/distribution.py
@@ -225,7 +225,7 @@ def __new__(cls, *args, **kwargs):
             potencial_parent = cls.get_contexts()[-1]
             # We have to make sure that the context is a _DrawValuesContext
             # and not a Model
-            if isinstance(potencial_parent, cls):
+            if isinstance(potencial_parent, _DrawValuesContext):
                 instance._parent = potencial_parent
             else:
                 instance._parent = None
@@ -239,7 +239,8 @@ def __init__(self):
             # another _DrawValuesContext will share the reference to the
             # drawn_vars dictionary. This means that separate branches
             # in the nested _DrawValuesContext context tree will see the
-            # same drawn values
+            # same drawn values.
+            # The drawn_vars keys shall be (RV, size) tuples
             self.drawn_vars = self.parent.drawn_vars
         else:
             self.drawn_vars = dict()
@@ -249,8 +250,8 @@ def parent(self):
         return self._parent
 
 
-class _DrawValuesOutContext(six.with_metaclass(InitContextMeta,
-                                               _DrawValuesContext)):
+class _DrawValuesContextBlocker(six.with_metaclass(InitContextMeta,
+                                                   _DrawValuesContext)):
     """
     Context manager that starts a new drawn variables context disregarding all
     parent contexts. This can be used inside a random method to ensure that
@@ -258,7 +259,7 @@ class _DrawValuesOutContext(six.with_metaclass(InitContextMeta,
     """
     def __new__(cls, *args, **kwargs):
         # resolves the parent instance
-        instance = super(_DrawValuesOutContext, cls).__new__(cls)
+        instance = super(_DrawValuesContextBlocker, cls).__new__(cls)
         instance._parent = None
         return instance
 
@@ -309,14 +310,14 @@ def draw_values(params, point=None, size=None):
                 continue
 
             name = getattr(p, 'name', None)
-            if p in drawn:
+            if (p, size) in drawn:
                 # param was drawn in related contexts
-                v = drawn[p]
+                v = drawn[(p, size)]
                 evaluated[i] = v
             elif name is not None and name in point:
                 # param.name is in point
                 v = point[name]
-                evaluated[i] = drawn[p] = v
+                evaluated[i] = drawn[(p, size)] = v
             else:
                 # param still needs to be drawn
                 symbolic_params.append((i, p))
@@ -351,12 +352,12 @@ def draw_values(params, point=None, size=None):
                         named_nodes_children[k].update(nnc[k])
 
         # Init givens and the stack of nodes to try to `_draw_value` from
-        givens = {p.name: (p, v) for p, v in drawn.items()
+        givens = {p.name: (p, v) for (p, size), v in drawn.items()
                   if getattr(p, 'name', None) is not None}
         stack = list(leaf_nodes.values())  # A queue would be more appropriate
         while stack:
             next_ = stack.pop(0)
-            if next_ in drawn:
+            if (next_, size) in drawn:
                 # If the node already has a givens value, skip it
                 continue
             elif isinstance(next_, (tt.TensorConstant,
@@ -385,14 +386,14 @@ def draw_values(params, point=None, size=None):
                                         givens=temp_givens,
                                         size=size)
                     givens[next_.name] = (next_, value)
-                    drawn[next_] = value
+                    drawn[(next_, size)] = value
                 except theano.gof.fg.MissingInputError:
                     # The node failed, so we must add the node's parents to
                     # the stack of nodes to try to draw from. We exclude the
                     # nodes in the `params` list.
                     stack.extend([node for node in named_nodes_parents[next_]
                                   if node is not None and
-                                  node.name not in drawn and
+                                  (node, size) not in drawn and
                                   node not in params])
 
         # the below makes sure the graph is evaluated in order
@@ -407,15 +408,15 @@ def draw_values(params, point=None, size=None):
             missing_inputs = set()
             for param_idx in to_eval:
                 param = params[param_idx]
-                if param in drawn:
-                    evaluated[param_idx] = drawn[param]
+                if (param, size) in drawn:
+                    evaluated[param_idx] = drawn[(param, size)]
                 else:
                     try:  # might evaluate in a bad order,
                         value = _draw_value(param,
                                             point=point,
                                             givens=givens.values(),
                                             size=size)
-                        evaluated[param_idx] = drawn[param] = value
+                        evaluated[param_idx] = drawn[(param, size)] = value
                         givens[param.name] = (param, value)
                     except theano.gof.fg.MissingInputError:
                         missing_inputs.add(param_idx)
@@ -496,8 +497,11 @@ def _draw_value(param, point=None, givens=None, size=None):
                     # reset shape to account for shape changes
                     # with theano.shared inputs
                     dist_tmp.shape = np.array([])
-                    val = np.atleast_1d(dist_tmp.random(point=point,
-                                                        size=None))
+                    # We want to draw values to infer the dist_shape,
+                    # we don't want to store these drawn values to the context
+                    with _DrawValuesContextBlocker():
+                        val = np.atleast_1d(dist_tmp.random(point=point,
+                                                            size=None))
                     # Sometimes point may change the size of val but not the
                     # distribution's shape
                     if point and size is not None:
@@ -607,7 +611,6 @@ def generate_samples(generator, *args, **kwargs):
     dist_shape = to_tuple(dist_shape)
     broadcast_shape = to_tuple(broadcast_shape)
     size_tup = to_tuple(size)
-    print(broadcast_shape, type(broadcast_shape))
 
     # All inputs are scalars, end up size (size_tup, dist_shape)
     if broadcast_shape in {(), (0,), (1,)}:
diff --git a/pymc3/distributions/mixture.py b/pymc3/distributions/mixture.py
index d6ff5e5c6d..949fcdb157 100644
--- a/pymc3/distributions/mixture.py
+++ b/pymc3/distributions/mixture.py
@@ -6,7 +6,7 @@
 from .dist_math import bound, random_choice
 from .distribution import (Discrete, Distribution, draw_values,
                            generate_samples, _DrawValuesContext,
-                           _DrawValuesOutContext)
+                           _DrawValuesContextBlocker)
 from .continuous import get_tau_sd, Normal
 
 
@@ -153,23 +153,36 @@ def logp(self, value):
                      broadcast_conditions=False)
 
     def random(self, point=None, size=None):
+        if size is not None:
+            if not isinstance(size, tuple):
+                try:
+                    size = tuple(size)
+                except TypeError:
+                    size = (size,)
         with _DrawValuesContext() as draw_context:
             # We first need to check w and comp_tmp shapes and re compute size
             w = draw_values([self.w], point=point, size=size)[0]
-        with _DrawValuesOutContext():
+        with _DrawValuesContextBlocker():
             # We don't want to store the values drawn here in the context
             # because they wont have the correct size
             comp_tmp = self._comp_samples(point=point, size=None)
-        param_shape = np.broadcast(w,
+
+        # When size is not None, it's hard to tell the w parameter shape
+        if size is not None:
+            w_shape = w.shape[len(size):]
+        else:
+            w_shape = w.shape
+
+        # Try to determine parameter shape and distshape
+        param_shape = np.broadcast(np.empty(w_shape),
                                    comp_tmp).shape
         if np.asarray(self.shape).size != 0:
             distshape = np.broadcast(np.empty(self.shape),
                                      np.empty(param_shape[:-1])).shape
         else:
             distshape = param_shape[:-1]
-        w = np.broadcast_to(w, distshape + (param_shape[-1],))
-        if size is not None and not isinstance(size, tuple):
-            size = tuple(size)
+
+        # When size is not None, it's hard to broadcast w correctly
         if size is not None:
             if size == distshape:
                 size = None
@@ -180,9 +193,23 @@ def random(self, point=None, size=None):
             else:
                 _size = np.prod(size)
         else:
-            _size = 1
-        output_size = _size * np.prod(distshape) * param_shape[-1]
+            _size = None
+        if size is not None:
+            # To allow w to broadcast, we insert new axis in between the "size"
+            # axis and the "mixture" axis
+            _w = w[(slice(None),) * len(size) +  # Index the size axis
+                   (np.newaxis,) * len(distshape) +  # Add new axis for the distshape
+                   (slice(None),)]  # Close with the slice of mixture components
+            w = np.broadcast_to(_w, size + distshape + (param_shape[-1],))
+        else:
+            w = np.broadcast_to(w, distshape + (param_shape[-1],))
+        if _size is not None:
+            output_size = _size * np.prod(distshape) * param_shape[-1]
+        else:
+            output_size = np.prod(distshape) * param_shape[-1]
         underlying_size = output_size // np.prod(comp_tmp.shape)
+        if underlying_size == 1 and _size is None:
+            underlying_size = None
 
         # Normalize inputs
         w = np.reshape(w, (-1, w.shape[-1]))
diff --git a/pymc3/distributions/multivariate.py b/pymc3/distributions/multivariate.py
index 90b9986251..58e9dbca8b 100755
--- a/pymc3/distributions/multivariate.py
+++ b/pymc3/distributions/multivariate.py
@@ -227,12 +227,13 @@ def __init__(self, mu, cov=None, tau=None, chol=None, lower=True,
 
     def random(self, point=None, size=None):
         if size is None:
-            size = []
+            size = tuple()
         else:
-            try:
-                size = list(size)
-            except TypeError:
-                size = [size]
+            if not isinstance(size, tuple):
+                try:
+                    size = tuple(size)
+                except TypeError:
+                    size = (size,)
 
         if self._cov_type == 'cov':
             mu, cov = draw_values([self.mu, self.cov], point=point, size=size)
@@ -247,13 +248,27 @@ def random(self, point=None, size=None):
                 return np.nan * np.zeros(size)
             return dist.rvs(size)
         elif self._cov_type == 'chol':
-            mu, chol = draw_values([self.mu, self.chol_cov], point=point, size=size)
-            if mu.shape[-1] != chol[0].shape[-1]:
+            mu, chol = draw_values([self.mu, self.chol_cov],
+                                   point=point, size=size)
+            if size and mu.ndim == len(size):
+                mu = mu[..., np.newaxis]
+            if mu.shape[-1] != chol.shape[-1] and mu.shape[-1] != 1:
                 raise ValueError("Shapes for mu and chol don't match")
-
-            size.append(mu.shape[-1])
-            standard_normal = np.random.standard_normal(size)
-            return mu + np.dot(standard_normal, chol.T)
+            broadcast_shape = (
+                np.broadcast(np.empty(mu.shape[:-1]),
+                             np.empty(chol.shape[:-2])).shape
+            )
+
+            mu = np.broadcast_to(mu, broadcast_shape + (chol.shape[-1],))
+            chol = np.broadcast_to(chol, broadcast_shape + chol.shape[-2:])
+            # If mu and chol were fixed by the point, only the standard normal
+            # should change
+            if mu.shape[:len(size)] != size:
+                std_norm_shape = size + mu.shape
+            else:
+                std_norm_shape = mu.shape
+            standard_normal = np.random.standard_normal(std_norm_shape)
+            return mu + np.tensordot(standard_normal, chol, axes=[[-1], [-2]])
         else:
             mu, tau = draw_values([self.mu, self.tau], point=point, size=size)
             if mu.shape[-1] != tau[0].shape[-1]:
@@ -998,47 +1013,88 @@ def logp(self, x):
 
         return norm + logp_lkj + logp_sd + det_invjac
 
-    def _random(self, n, eta, size=None):
-        size = size if isinstance(size, tuple) else (size,)
+    def _random(self, n, eta, size=1):
+        eta_sample_shape = (size,) + eta.shape
+        P = np.eye(n) * np.ones(eta_sample_shape + (n, n))
         # original implementation in R see:
         # https://github.com/rmcelreath/rethinking/blob/master/R/distributions.r
         beta = eta - 1. + n/2.
-        r12 = 2. * stats.beta.rvs(a=beta, b=beta, size=size) - 1.
-        P = np.eye(n)[:, :, np.newaxis] * np.ones(size)
-        P[0, 1] = r12
-        P[1, 1] = np.sqrt(1. - r12**2)
+        r12 = 2. * stats.beta.rvs(a=beta, b=beta, size=eta_sample_shape) - 1.
+        P[..., 0, 1] = r12
+        P[..., 1, 1] = np.sqrt(1. - r12**2)
         for mp1 in range(2, n):
             beta -= 0.5
-            y = stats.beta.rvs(a=mp1 / 2., b=beta, size=size)
-            z = stats.norm.rvs(loc=0, scale=1, size=(mp1, ) + size)
+            y = stats.beta.rvs(a=mp1 / 2., b=beta, size=eta_sample_shape)
+            z = stats.norm.rvs(loc=0, scale=1, size=(mp1, ) + eta_sample_shape)
             z = z / np.sqrt(np.einsum('ij,ij->j', z, z))
-            P[0:mp1, mp1] = np.sqrt(y) * z
-            P[mp1, mp1] = np.sqrt(1. - y)
-        C = np.einsum('ji...,jk...->...ik', P, P)
-        D = np.atleast_1d(self.sd_dist.random(size=size))
-        if D.shape == size:
-            D = np.array([D] + [self.sd_dist.random(size=size)
-                                for _ in range(C.shape[-1] - 1)])
-            D = np.moveaxis(np.array(D), 0, D.ndim - 1)
-        elif D.shape[-1] == 1:
-            D = [D] + [self.sd_dist.random(size=size)
-                       for _ in range(C.shape[-1] - 1)]
-            D = np.concatenate(D, axis=-1)
-        try:
-            C *= D[..., :, np.newaxis] * D[..., np.newaxis, :]
-        except TypeError:
-            pass
-        chol = np.linalg.cholesky(C)
+            P[..., 0:mp1, mp1] = np.sqrt(y) * z
+            P[..., mp1, mp1] = np.sqrt(1. - y)
+        C = np.einsum('...ji,...jk->...ik', P, P)
+        D = np.atleast_1d(self.sd_dist.random(size=P.shape[:-2]))
+        if D.shape in [tuple(), (1,)]:
+            D = self.sd_dist.random(size=P.shape[:-1])
+        elif D.ndim < C.ndim - 1:
+            D = [D] + [self.sd_dist.random(size=P.shape[:-2])
+                       for _ in range(n - 1)]
+            D = np.moveaxis(np.array(D), 0, C.ndim - 2)
+        elif D.ndim == C.ndim - 1:
+            if D.shape[-1] == 1:
+                D = [D] + [self.sd_dist.random(size=P.shape[:-2])
+                           for _ in range(n - 1)]
+                D = np.concatenate(D, axis=-1)
+            elif D.shape[-1] != n:
+                raise ValueError('The size of the samples drawn from the '
+                                 'supplied sd_dist.random have the wrong '
+                                 'size. Expected {} but got {} instead.'.
+                                 format(n, D.shape[-1]))
+        else:
+            raise ValueError('Supplied sd_dist.random generates samples with '
+                             'too many dimensions. It must yield samples '
+                             'with 0 or 1 dimensions. Got {} instead'.
+                             format(D.ndim - C.ndim - 2))
+        C *= D[..., :, np.newaxis] * D[..., np.newaxis, :]
         tril_idx = np.tril_indices(n, k=0)
-        return chol[..., tril_idx[0], tril_idx[1]]
+        return np.linalg.cholesky(C)[..., tril_idx[0], tril_idx[1]]
 
     def random(self, point=None, size=None):
+        # Get parameters and broadcast them
         n, eta = draw_values([self.n, self.eta], point=point, size=size)
-        size= 1 if size is None else size
-        samples = generate_samples(self._random, n, eta,
-                                   dist_shape=self.shape,
-                                   broadcast_shape=self.shape,
-                                   size=size)
+        broadcast_shape = np.broadcast(n, eta).shape
+        # We can only handle cov matrices with a constant n per random call
+        n = np.unique(n)
+        if len(n) > 1:
+            raise RuntimeError('Varying n is not supported for LKJCholeskyCov')
+        n = int(n[0])
+        dist_shape = ((n * (n + 1)) // 2,)
+        # We make sure that eta and the drawn n get their shapes broadcasted
+        eta = np.broadcast_to(eta, broadcast_shape)
+        # We change the size of the draw depending on the broadcast shape
+        sample_shape = broadcast_shape + dist_shape
+        if size is not None:
+            if not isinstance(size, tuple):
+                try:
+                    size = tuple(size)
+                except TypeError:
+                    size = (size,)
+            if size == sample_shape:
+                size = None
+            elif size == broadcast_shape:
+                size = None
+            elif size[-len(sample_shape):] == sample_shape:
+                size = size[:len(size) - len(sample_shape)]
+            elif size[-len(broadcast_shape):] == broadcast_shape:
+                size = size[:len(size) - len(broadcast_shape)]
+        # We will always provide _random with an integer size and then reshape
+        # the output to get the correct size
+        if size is not None:
+            _size = np.prod(size)
+        else:
+            _size = 1
+        samples = self._random(n, eta, size=_size)
+        if size is None:
+            samples = samples[0]
+        else:
+            samples = np.reshape(samples, size + sample_shape)
         return samples
 
 
@@ -1140,7 +1196,6 @@ def random(self, point=None, size=None):
         n, eta = draw_values([self.n, self.eta], point=point, size=size)
         size= 1 if size is None else size
         samples = generate_samples(self._random, n, eta,
-                                   dist_shape=self.shape,
                                    broadcast_shape=(size,))
         return samples
 
diff --git a/pymc3/distributions/transforms.py b/pymc3/distributions/transforms.py
index 682b3426d7..4527fab09d 100644
--- a/pymc3/distributions/transforms.py
+++ b/pymc3/distributions/transforms.py
@@ -448,7 +448,7 @@ def forward(self, y):
         return tt.advanced_set_subtensor1(y, tt.log(y[self.diag_idxs]), self.diag_idxs)
 
     def forward_val(self, y, point=None):
-        y[self.diag_idxs] = np.log(y[self.diag_idxs])
+        y[..., self.diag_idxs] = np.log(y[..., self.diag_idxs])
         return y
 
     def jacobian_det(self, y):

From 7867b16954f390b80845277fc663c966fa48b062 Mon Sep 17 00:00:00 2001
From: Luciano Paz <chulo1787@gmail.com>
Date: Fri, 7 Dec 2018 15:58:02 +0100
Subject: [PATCH 3/7] Added test for sampling prior and posterior predictives
 from a mixture based on issue #3270

---
 pymc3/tests/test_mixture.py | 51 +++++++++++++++++++++++++++++++++++++
 1 file changed, 51 insertions(+)

diff --git a/pymc3/tests/test_mixture.py b/pymc3/tests/test_mixture.py
index 7ee53a8753..1d382168fd 100644
--- a/pymc3/tests/test_mixture.py
+++ b/pymc3/tests/test_mixture.py
@@ -2,6 +2,7 @@
 from numpy.testing import assert_allclose
 
 from .helpers import SeededTest
+import pymc3 as pm
 from pymc3 import Dirichlet, Gamma, Normal, Lognormal, Poisson, Exponential, \
     Mixture, NormalMixture, MvNormal, sample, Metropolis, Model
 import scipy.stats as st
@@ -231,3 +232,53 @@ def mixmixlogp(value, point):
         assert_allclose(mixmixlogpg, mix.logp_elemwise(test_point))
         assert_allclose(priorlogp + mixmixlogpg.sum(),
                         model.logp(test_point))
+
+    def test_sample_prior_and_posterior(self):
+        def build_toy_dataset(N, K):
+            pi = np.array([0.2, 0.5, 0.3])
+            mus = [[1, 1], [-1, -1], [2, -2]]
+            stds = [[0.1, 0.1], [0.1, 0.2], [0.2, 0.3]]
+            x = np.zeros((N, 2), dtype=np.float32)
+            y = np.zeros((N,), dtype=np.int)
+            for n in range(N):
+                k = np.argmax(np.random.multinomial(1, pi))
+                x[n, :] = np.random.multivariate_normal(mus[k],
+                                                        np.diag(stds[k]))
+                y[n] = k
+            return x, y
+
+        N = 100  # number of data points
+        D = 2  # dimensionality of data
+
+        X, y = build_toy_dataset(N, 3)
+
+        K = 3
+        with pm.Model() as model:
+            pi = pm.Dirichlet('pi', np.ones(K))
+
+            comp_dist = []
+            mu = []
+            packed_chol = []
+            chol = []
+            for i in range(K):
+                mu.append(pm.Normal('mu%i' % i, 0, 10, shape=2))
+                packed_chol.append(
+                    pm.LKJCholeskyCov('chol_cov_%i' % i,
+                                      eta=2,
+                                      n=2,
+                                      sd_dist=pm.HalfNormal.dist(2.5))
+                )
+                chol.append(pm.expand_packed_triangular(2, packed_chol[i],
+                                                        lower=True))
+                comp_dist.append(pm.MvNormal.dist(mu=mu[i], chol=chol[i]))
+
+            pm.Mixture('x_obs', pi, comp_dist, observed=X)
+        with model:
+            trace = pm.sample(30, tune=10, chains=1)
+
+        n_samples = 20
+        with model:
+            ppc = pm.sample_posterior_predictive(trace, n_samples)
+            prior = pm.sample_prior_predictive(samples=n_samples)
+        assert ppc['x_obs'].shape == (n_samples,) + X.shape
+        assert prior['x_obs'].shape == (n_samples, D)

From 08323bc3023ba1c1c16819725cd1d57fed295e97 Mon Sep 17 00:00:00 2001
From: Luciano Paz <chulo1787@gmail.com>
Date: Mon, 10 Dec 2018 13:35:04 +0100
Subject: [PATCH 4/7] Fixed bugs and failed tests. Still must write a test for
 LKJCholeskyCov.random method.

---
 pymc3/distributions/distribution.py |  28 +++--
 pymc3/distributions/mixture.py      | 152 +++++++++++++++++++---------
 pymc3/distributions/multivariate.py |  14 +--
 pymc3/tests/test_mixture.py         |  22 ++--
 4 files changed, 146 insertions(+), 70 deletions(-)

diff --git a/pymc3/distributions/distribution.py b/pymc3/distributions/distribution.py
index ef146461e9..073b054b30 100644
--- a/pymc3/distributions/distribution.py
+++ b/pymc3/distributions/distribution.py
@@ -518,20 +518,34 @@ def _draw_value(param, point=None, givens=None, size=None):
                 variables, values = list(zip(*givens))
             else:
                 variables = values = []
-            func = _compile_theano_function(param, variables)
+            # We only truly care if the ancestors of param that were given
+            # value have the matching dshape and val.shape
+            param_ancestors = \
+                set(theano.gof.graph.ancestors([param],
+                                               blockers=list(variables))
+                    )
+            inputs = [(var, val) for var, val in
+                      zip(variables, values)
+                      if var in param_ancestors]
+            if inputs:
+                input_vars, input_vals = list(zip(*inputs))
+            else:
+                input_vars = []
+                input_vals = []
+            func = _compile_theano_function(param, input_vars)
             if size is not None:
                 size = np.atleast_1d(size)
             dshaped_variables = all((hasattr(var, 'dshape')
-                                     for var in variables))
+                                     for var in input_vars))
             if (values and dshaped_variables and
                 not all(var.dshape == getattr(val, 'shape', tuple())
-                        for var, val in zip(variables, values))):
-                output = np.array([func(*v) for v in zip(*values)])
+                        for var, val in zip(input_vars, input_vals))):
+                output = np.array([func(*v) for v in zip(*input_vals)])
             elif (size is not None and any((val.ndim > var.ndim)
-                  for var, val in zip(variables, values))):
-                output = np.array([func(*v) for v in zip(*values)])
+                  for var, val in zip(input_vars, input_vals))):
+                output = np.array([func(*v) for v in zip(*input_vals)])
             else:
-                output = func(*values)
+                output = func(*input_vals)
             return output
     raise ValueError('Unexpected type in draw_value: %s' % type(param))
 
diff --git a/pymc3/distributions/mixture.py b/pymc3/distributions/mixture.py
index 949fcdb157..4719427a17 100644
--- a/pymc3/distributions/mixture.py
+++ b/pymc3/distributions/mixture.py
@@ -6,7 +6,7 @@
 from .dist_math import bound, random_choice
 from .distribution import (Discrete, Distribution, draw_values,
                            generate_samples, _DrawValuesContext,
-                           _DrawValuesContextBlocker)
+                           _DrawValuesContextBlocker, to_tuple)
 from .continuous import get_tau_sd, Normal
 
 
@@ -104,6 +104,36 @@ def __init__(self, w, comp_dists, *args, **kwargs):
         super(Mixture, self).__init__(shape, dtype, defaults=defaults,
                                       *args, **kwargs)
 
+    @property
+    def comp_dists(self):
+        return self._comp_dists
+
+    @comp_dists.setter
+    def comp_dists(self, _comp_dists):
+        self._comp_dists = _comp_dists
+        # Tests if the comp_dists can call random with non None size
+        test_draw = None
+        try:
+            test_draw = self.comp_dists.random(size=20)
+        except TypeError:
+            # The comp_dists cannot call random with non None size or
+            # without knowledge of the point so we assume that we will
+            # have to iterate calls to random to get the correct size
+            pass
+        except AttributeError:
+            try:
+                test_draw = np.array([comp_dist.random(size=20)
+                                      for comp_dist in self.comp_dists])
+            except TypeError:
+                # The comp_dists cannot call random with non None size or
+                # without knowledge of the point so we assume that we will
+                # have to iterate calls to random to get the correct size
+                pass
+        if test_draw is None:
+            self._comp_dists_vect = False
+        else:
+            self._comp_dists_vect = True
+
     def _comp_logp(self, value):
         comp_dists = self.comp_dists
 
@@ -133,12 +163,28 @@ def _comp_modes(self):
                                        axis=1))
 
     def _comp_samples(self, point=None, size=None):
-        try:
-            return self.comp_dists.random(point=point, size=size)
-        except AttributeError:
-            samples = np.array([comp_dist.random(point=point, size=size)
-                                for comp_dist in self.comp_dists])
-            samples = np.moveaxis(samples, 0, samples.ndim - 1)
+        if self._comp_dists_vect or size is None:
+            try:
+                return self.comp_dists.random(point=point, size=size)
+            except AttributeError:
+                samples = np.array([comp_dist.random(point=point, size=size)
+                                    for comp_dist in self.comp_dists])
+                samples = np.moveaxis(samples, 0, samples.ndim - 1)
+        else:
+            # We must iterate the calls to random manually
+            size = to_tuple(size)
+            _size = int(np.prod(size))
+            try:
+                samples = np.array([self.comp_dists.random(point=point,
+                                                           size=None)
+                                    for _ in range(_size)])
+                samples = np.reshape(samples, size + samples.shape[1:])
+            except AttributeError:
+                samples = np.array([[comp_dist.random(point=point, size=None)
+                                     for _ in range(_size)]
+                                    for comp_dist in self.comp_dists])
+                samples = np.moveaxis(samples, 0, samples.ndim - 1)
+                samples = np.reshape(samples, size + samples[1:])
 
         if samples.shape[-1] == 1:
             return samples[..., 0]
@@ -153,12 +199,9 @@ def logp(self, value):
                      broadcast_conditions=False)
 
     def random(self, point=None, size=None):
-        if size is not None:
-            if not isinstance(size, tuple):
-                try:
-                    size = tuple(size)
-                except TypeError:
-                    size = (size,)
+        # Convert size to tuple
+        size = to_tuple(size)
+        # Draw mixture weights and a sample from each mixture to infer shape
         with _DrawValuesContext() as draw_context:
             # We first need to check w and comp_tmp shapes and re compute size
             w = draw_values([self.w], point=point, size=size)[0]
@@ -168,51 +211,65 @@ def random(self, point=None, size=None):
             comp_tmp = self._comp_samples(point=point, size=None)
 
         # When size is not None, it's hard to tell the w parameter shape
-        if size is not None:
+        if size is not None and w.shape[:len(size)] == size:
             w_shape = w.shape[len(size):]
         else:
             w_shape = w.shape
 
-        # Try to determine parameter shape and distshape
+        # Try to determine parameter shape and dist_shape
         param_shape = np.broadcast(np.empty(w_shape),
                                    comp_tmp).shape
         if np.asarray(self.shape).size != 0:
-            distshape = np.broadcast(np.empty(self.shape),
-                                     np.empty(param_shape[:-1])).shape
+            dist_shape = np.broadcast(np.empty(self.shape),
+                                      np.empty(param_shape[:-1])).shape
         else:
-            distshape = param_shape[:-1]
+            dist_shape = param_shape[:-1]
 
-        # When size is not None, it's hard to broadcast w correctly
+        # When size is not None, maybe dist_shape partially overlaps with size
         if size is not None:
-            if size == distshape:
+            if size == dist_shape:
                 size = None
-                _size = -1
-            elif size[-len(distshape):] == distshape:
-                size = size[:len(size) - len(distshape)]
-                _size = np.prod(size)
-            else:
-                _size = np.prod(size)
-        else:
+            elif size[-len(dist_shape):] == dist_shape:
+                size = size[:len(size) - len(dist_shape)]
+
+        # We get an integer _size instead of a tuple size for drawing the
+        # mixture, then we just reshape the output
+        if size is None:
             _size = None
-        if size is not None:
-            # To allow w to broadcast, we insert new axis in between the "size"
-            # axis and the "mixture" axis
-            _w = w[(slice(None),) * len(size) +  # Index the size axis
-                   (np.newaxis,) * len(distshape) +  # Add new axis for the distshape
-                   (slice(None),)]  # Close with the slice of mixture components
-            w = np.broadcast_to(_w, size + distshape + (param_shape[-1],))
         else:
-            w = np.broadcast_to(w, distshape + (param_shape[-1],))
-        if _size is not None:
-            output_size = _size * np.prod(distshape) * param_shape[-1]
+            _size = int(np.prod(size))
+
+        # Now we must broadcast w to the shape that considers size, dist_shape
+        # and param_shape. However, we must take care with the cases in which
+        # dist_shape and param_shape overlap
+        if size is not None and w.shape[:len(size)] == size:
+            if w.shape[:len(size + dist_shape)] != (size + dist_shape):
+                # To allow w to broadcast, we insert new axis in between the
+                # "size" axis and the "mixture" axis
+                _w = w[(slice(None),) * len(size) +  # Index the size axis
+                       (np.newaxis,) * len(dist_shape) +  # Add new axis for the dist_shape
+                       (slice(None),)]  # Close with the slice of mixture components
+                w = np.broadcast_to(_w, size + dist_shape + (param_shape[-1],))
+        elif size is not None:
+            w = np.broadcast_to(w, size + dist_shape + (param_shape[-1],))
         else:
-            output_size = np.prod(distshape) * param_shape[-1]
-        underlying_size = output_size // np.prod(comp_tmp.shape)
-        if underlying_size == 1 and _size is None:
-            underlying_size = None
+            w = np.broadcast_to(w, dist_shape + (param_shape[-1],))
 
-        # Normalize inputs
+        # Compute the total size of the mixture's random call with size
+        if _size is not None:
+            output_size = int(_size * np.prod(dist_shape) * param_shape[-1])
+        else:
+            output_size = int(np.prod(dist_shape) * param_shape[-1])
+        # Get the size we need for the mixture's random call
+        mixture_size = int(output_size // np.prod(comp_tmp.shape))
+        if mixture_size == 1 and _size is None:
+            mixture_size = None
+
+        # Semiflatten the mixture weights. The last axis is the number of
+        # mixture mixture components, and the rest is all about size,
+        # dist_shape and broadcasting
         w = np.reshape(w, (-1, w.shape[-1]))
+        # Normalize mixture weights
         w = w / w.sum(axis=-1, keepdims=True)
 
         w_samples = generate_samples(random_choice,
@@ -220,18 +277,21 @@ def random(self, point=None, size=None):
                                      broadcast_shape=w.shape[:-1] or (1,),
                                      dist_shape=w.shape[:-1] or (1,),
                                      size=size)
+        # Sample from the mixture
         with draw_context:
             mixed_samples = self._comp_samples(point=point,
-                                               size=underlying_size)
+                                               size=mixture_size)
         w_samples = w_samples.flatten()
+        # Semiflatten the mixture to be able to zip it with w_samples
         mixed_samples = np.reshape(mixed_samples, (-1, comp_tmp.shape[-1]))
+        # Select the samples from the mixture
         samples = np.array([mixed[choice] for choice, mixed in
                             zip(w_samples, mixed_samples)])
+        # Reshape the samples to the correct output shape
         if size is None:
-            samples = np.reshape(samples, distshape)
+            samples = np.reshape(samples, dist_shape)
         else:
-            samples = np.reshape(samples, size + distshape)
-
+            samples = np.reshape(samples, size + dist_shape)
         return samples
 
 
diff --git a/pymc3/distributions/multivariate.py b/pymc3/distributions/multivariate.py
index 58e9dbca8b..569e3c1d85 100755
--- a/pymc3/distributions/multivariate.py
+++ b/pymc3/distributions/multivariate.py
@@ -244,13 +244,13 @@ def random(self, point=None, size=None):
                 dist = stats.multivariate_normal(
                     mean=mu, cov=cov, allow_singular=True)
             except ValueError:
-                size.append(mu.shape[-1])
+                size += (mu.shape[-1],)
                 return np.nan * np.zeros(size)
             return dist.rvs(size)
         elif self._cov_type == 'chol':
             mu, chol = draw_values([self.mu, self.chol_cov],
                                    point=point, size=size)
-            if size and mu.ndim == len(size):
+            if size and mu.ndim == len(size) and mu.shape == size:
                 mu = mu[..., np.newaxis]
             if mu.shape[-1] != chol.shape[-1] and mu.shape[-1] != 1:
                 raise ValueError("Shapes for mu and chol don't match")
@@ -268,13 +268,13 @@ def random(self, point=None, size=None):
             else:
                 std_norm_shape = mu.shape
             standard_normal = np.random.standard_normal(std_norm_shape)
-            return mu + np.tensordot(standard_normal, chol, axes=[[-1], [-2]])
+            return mu + np.tensordot(standard_normal, chol, axes=[[-1], [-1]])
         else:
             mu, tau = draw_values([self.mu, self.tau], point=point, size=size)
             if mu.shape[-1] != tau[0].shape[-1]:
                 raise ValueError("Shapes for mu and tau don't match")
 
-            size.append(mu.shape[-1])
+            size += (mu.shape[-1],)
             try:
                 chol = linalg.cholesky(tau, lower=True)
             except linalg.LinAlgError:
@@ -962,7 +962,7 @@ def __init__(self, eta, n, sd_dist, *args, **kwargs):
         self.n = n
         self.eta = eta
 
-        if 'transform' in kwargs:
+        if 'transform' in kwargs and kwargs['transform'] is not None:
             raise ValueError('Invalid parameter: transform.')
         if 'shape' in kwargs:
             raise ValueError('Invalid parameter: shape.')
@@ -1025,9 +1025,9 @@ def _random(self, n, eta, size=1):
         for mp1 in range(2, n):
             beta -= 0.5
             y = stats.beta.rvs(a=mp1 / 2., b=beta, size=eta_sample_shape)
-            z = stats.norm.rvs(loc=0, scale=1, size=(mp1, ) + eta_sample_shape)
+            z = stats.norm.rvs(loc=0, scale=1, size=eta_sample_shape + (mp1,))
             z = z / np.sqrt(np.einsum('ij,ij->j', z, z))
-            P[..., 0:mp1, mp1] = np.sqrt(y) * z
+            P[..., 0:mp1, mp1] = np.sqrt(y[..., np.newaxis]) * z
             P[..., mp1, mp1] = np.sqrt(1. - y)
         C = np.einsum('...ji,...jk->...ik', P, P)
         D = np.atleast_1d(self.sd_dist.random(size=P.shape[:-2]))
diff --git a/pymc3/tests/test_mixture.py b/pymc3/tests/test_mixture.py
index 1d382168fd..010532c800 100644
--- a/pymc3/tests/test_mixture.py
+++ b/pymc3/tests/test_mixture.py
@@ -236,9 +236,9 @@ def mixmixlogp(value, point):
     def test_sample_prior_and_posterior(self):
         def build_toy_dataset(N, K):
             pi = np.array([0.2, 0.5, 0.3])
-            mus = [[1, 1], [-1, -1], [2, -2]]
-            stds = [[0.1, 0.1], [0.1, 0.2], [0.2, 0.3]]
-            x = np.zeros((N, 2), dtype=np.float32)
+            mus = [[1, 1, 1], [-1, -1, -1], [2, -2, 0]]
+            stds = [[0.1, 0.1, 0.1], [0.1, 0.2, 0.2], [0.2, 0.3, 0.3]]
+            x = np.zeros((N, 3), dtype=np.float32)
             y = np.zeros((N,), dtype=np.int)
             for n in range(N):
                 k = np.argmax(np.random.multinomial(1, pi))
@@ -248,11 +248,11 @@ def build_toy_dataset(N, K):
             return x, y
 
         N = 100  # number of data points
-        D = 2  # dimensionality of data
+        K = 3  # number of mixture components
+        D = 3  # dimensionality of the data
 
-        X, y = build_toy_dataset(N, 3)
+        X, y = build_toy_dataset(N, K)
 
-        K = 3
         with pm.Model() as model:
             pi = pm.Dirichlet('pi', np.ones(K))
 
@@ -261,14 +261,14 @@ def build_toy_dataset(N, K):
             packed_chol = []
             chol = []
             for i in range(K):
-                mu.append(pm.Normal('mu%i' % i, 0, 10, shape=2))
+                mu.append(pm.Normal('mu%i' % i, 0, 10, shape=D))
                 packed_chol.append(
                     pm.LKJCholeskyCov('chol_cov_%i' % i,
                                       eta=2,
-                                      n=2,
+                                      n=D,
                                       sd_dist=pm.HalfNormal.dist(2.5))
                 )
-                chol.append(pm.expand_packed_triangular(2, packed_chol[i],
+                chol.append(pm.expand_packed_triangular(D, packed_chol[i],
                                                         lower=True))
                 comp_dist.append(pm.MvNormal.dist(mu=mu[i], chol=chol[i]))
 
@@ -281,4 +281,6 @@ def build_toy_dataset(N, K):
             ppc = pm.sample_posterior_predictive(trace, n_samples)
             prior = pm.sample_prior_predictive(samples=n_samples)
         assert ppc['x_obs'].shape == (n_samples,) + X.shape
-        assert prior['x_obs'].shape == (n_samples, D)
+        assert prior['x_obs'].shape == (n_samples,) + X.shape
+        assert prior['mu0'].shape == (n_samples, D)
+        assert prior['chol_cov_0'].shape == (n_samples, D * (D + 1) // 2)

From 0ace3fa562a1eaf981a0e4e08f5b8ad592eb854b Mon Sep 17 00:00:00 2001
From: lucianopaz <chulo1787@gmail.com>
Date: Mon, 10 Dec 2018 20:56:23 +0100
Subject: [PATCH 5/7] Fixed failed test

---
 pymc3/distributions/mixture.py | 29 +++++++++++++----------------
 1 file changed, 13 insertions(+), 16 deletions(-)

diff --git a/pymc3/distributions/mixture.py b/pymc3/distributions/mixture.py
index 4719427a17..c6db4cfe7c 100644
--- a/pymc3/distributions/mixture.py
+++ b/pymc3/distributions/mixture.py
@@ -112,27 +112,24 @@ def comp_dists(self):
     def comp_dists(self, _comp_dists):
         self._comp_dists = _comp_dists
         # Tests if the comp_dists can call random with non None size
-        test_draw = None
-        try:
-            test_draw = self.comp_dists.random(size=20)
-        except TypeError:
-            # The comp_dists cannot call random with non None size or
-            # without knowledge of the point so we assume that we will
-            # have to iterate calls to random to get the correct size
-            pass
-        except AttributeError:
+        if isinstance(self.comp_dists, (list, tuple)):
             try:
-                test_draw = np.array([comp_dist.random(size=20)
-                                      for comp_dist in self.comp_dists])
-            except TypeError:
+                [comp_dist.random(size=23) for comp_dist in self.comp_dists]
+                self._comp_dists_vect = True
+            except Exception:
                 # The comp_dists cannot call random with non None size or
                 # without knowledge of the point so we assume that we will
                 # have to iterate calls to random to get the correct size
-                pass
-        if test_draw is None:
-            self._comp_dists_vect = False
+                self._comp_dists_vect = False
         else:
-            self._comp_dists_vect = True
+            try:
+                self.comp_dists.random(size=23)
+                self._comp_dists_vect = True
+            except Exception:
+                # The comp_dists cannot call random with non None size or
+                # without knowledge of the point so we assume that we will
+                # have to iterate calls to random to get the correct size
+                self._comp_dists_vect = False
 
     def _comp_logp(self, value):
         comp_dists = self.comp_dists

From 029474fc8a254831449d9883d150e70f2a30cdc6 Mon Sep 17 00:00:00 2001
From: Luciano Paz <chulo1787@gmail.com>
Date: Tue, 11 Dec 2018 11:31:17 +0100
Subject: [PATCH 6/7] Added tests for DrawValuesContext and also for the
 context blocker that was introduced in this PR.

---
 pymc3/distributions/mixture.py           | 38 +++++++++++----------
 pymc3/tests/test_distributions_random.py | 42 +++++++++++++++++++++++-
 2 files changed, 61 insertions(+), 19 deletions(-)

diff --git a/pymc3/distributions/mixture.py b/pymc3/distributions/mixture.py
index c6db4cfe7c..a681cb1699 100644
--- a/pymc3/distributions/mixture.py
+++ b/pymc3/distributions/mixture.py
@@ -112,24 +112,26 @@ def comp_dists(self):
     def comp_dists(self, _comp_dists):
         self._comp_dists = _comp_dists
         # Tests if the comp_dists can call random with non None size
-        if isinstance(self.comp_dists, (list, tuple)):
-            try:
-                [comp_dist.random(size=23) for comp_dist in self.comp_dists]
-                self._comp_dists_vect = True
-            except Exception:
-                # The comp_dists cannot call random with non None size or
-                # without knowledge of the point so we assume that we will
-                # have to iterate calls to random to get the correct size
-                self._comp_dists_vect = False
-        else:
-            try:
-                self.comp_dists.random(size=23)
-                self._comp_dists_vect = True
-            except Exception:
-                # The comp_dists cannot call random with non None size or
-                # without knowledge of the point so we assume that we will
-                # have to iterate calls to random to get the correct size
-                self._comp_dists_vect = False
+        with _DrawValuesContextBlocker():
+            if isinstance(self.comp_dists, (list, tuple)):
+                try:
+                    [comp_dist.random(size=23)
+                     for comp_dist in self.comp_dists]
+                    self._comp_dists_vect = True
+                except Exception:
+                    # The comp_dists cannot call random with non None size or
+                    # without knowledge of the point so we assume that we will
+                    # have to iterate calls to random to get the correct size
+                    self._comp_dists_vect = False
+            else:
+                try:
+                    self.comp_dists.random(size=23)
+                    self._comp_dists_vect = True
+                except Exception:
+                    # The comp_dists cannot call random with non None size or
+                    # without knowledge of the point so we assume that we will
+                    # have to iterate calls to random to get the correct size
+                    self._comp_dists_vect = False
 
     def _comp_logp(self, value):
         comp_dists = self.comp_dists
diff --git a/pymc3/tests/test_distributions_random.py b/pymc3/tests/test_distributions_random.py
index 0e354536c6..39218f7138 100644
--- a/pymc3/tests/test_distributions_random.py
+++ b/pymc3/tests/test_distributions_random.py
@@ -10,7 +10,9 @@
 import theano
 
 import pymc3 as pm
-from pymc3.distributions.distribution import draw_values
+from pymc3.distributions.distribution import (draw_values,
+                                              _DrawValuesContext,
+                                              _DrawValuesContextBlocker)
 from .helpers import SeededTest
 from .test_distributions import (
     build_model, Domain, product, R, Rplus, Rplusbig, Runif, Rplusdunif,
@@ -116,6 +118,44 @@ def test_random_sample_returns_nd_array(self):
         assert isinstance(tau, np.ndarray)
 
 
+class TestDrawValuesContext(object):
+    def test_normal_context(self):
+        with _DrawValuesContext() as context0:
+            assert context0.parent is None
+            context0.drawn_vars['root_test'] = 1
+            with _DrawValuesContext() as context1:
+                assert id(context1.drawn_vars) == id(context0.drawn_vars)
+                assert context1.parent == context0
+                with _DrawValuesContext() as context2:
+                    assert id(context2.drawn_vars) == id(context0.drawn_vars)
+                    assert context2.parent == context1
+                    context2.drawn_vars['leaf_test'] = 2
+                assert context1.drawn_vars['leaf_test'] == 2
+                context1.drawn_vars['root_test'] = 3
+            assert context0.drawn_vars['root_test'] == 3
+            assert context0.drawn_vars['leaf_test'] == 2
+
+    def test_blocking_context(self):
+        with _DrawValuesContext() as context0:
+            assert context0.parent is None
+            context0.drawn_vars['root_test'] = 1
+            with _DrawValuesContext() as context1:
+                assert id(context1.drawn_vars) == id(context0.drawn_vars)
+                assert context1.parent == context0
+                with _DrawValuesContextBlocker() as blocker:
+                    assert id(blocker.drawn_vars) != id(context0.drawn_vars)
+                    assert blocker.parent is None
+                    blocker.drawn_vars['root_test'] = 2
+                    with _DrawValuesContext() as context2:
+                        assert id(context2.drawn_vars) == id(blocker.drawn_vars)
+                        assert context2.parent == blocker
+                        context2.drawn_vars['root_test'] = 3
+                        context2.drawn_vars['leaf_test'] = 4
+                    assert blocker.drawn_vars['root_test'] == 3
+                assert 'leaf_test' not in context1.drawn_vars
+            assert context0.drawn_vars['root_test'] == 1
+
+
 class BaseTestCases(object):
     class BaseTestCase(SeededTest):
         shape = 5

From 4adddfbb515fc97182039af8327c89b835c0b305 Mon Sep 17 00:00:00 2001
From: lucianopaz <luciano.paz.neuro@gmail.com>
Date: Thu, 3 Jan 2019 11:11:49 +0100
Subject: [PATCH 7/7] Changed six metaclass to metaclass keyword

---
 pymc3/distributions/distribution.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/pymc3/distributions/distribution.py b/pymc3/distributions/distribution.py
index 916794a713..248720101d 100644
--- a/pymc3/distributions/distribution.py
+++ b/pymc3/distributions/distribution.py
@@ -246,8 +246,7 @@ def parent(self):
         return self._parent
 
 
-class _DrawValuesContextBlocker(six.with_metaclass(InitContextMeta,
-                                                   _DrawValuesContext)):
+class _DrawValuesContextBlocker(_DrawValuesContext, metaclass=InitContextMeta):
     """
     Context manager that starts a new drawn variables context disregarding all
     parent contexts. This can be used inside a random method to ensure that