diff --git a/pymc3/distributions/distribution.py b/pymc3/distributions/distribution.py
index dd655ada61..248720101d 100644
--- a/pymc3/distributions/distribution.py
+++ b/pymc3/distributions/distribution.py
@@ -221,7 +221,7 @@ def __new__(cls, *args, **kwargs):
             potential_parent = cls.get_contexts()[-1]
             # We have to make sure that the context is a _DrawValuesContext
             # and not a Model
-            if isinstance(potential_parent, cls):
+            if isinstance(potential_parent, _DrawValuesContext):
                 instance._parent = potential_parent
             else:
                 instance._parent = None
@@ -235,7 +235,8 @@ def __init__(self):
             # another _DrawValuesContext will share the reference to the
             # drawn_vars dictionary. This means that separate branches
             # in the nested _DrawValuesContext context tree will see the
-            # same drawn values
+            # same drawn values.
+            # The drawn_vars keys shall be (RV, size) tuples
             self.drawn_vars = self.parent.drawn_vars
         else:
             self.drawn_vars = dict()
@@ -245,6 +246,22 @@ def parent(self):
         return self._parent
 
 
+class _DrawValuesContextBlocker(_DrawValuesContext, metaclass=InitContextMeta):
+    """
+    Context manager that starts a new drawn variables context disregarding all
+    parent contexts. This can be used inside a random method to ensure that
+    the drawn values wont be the ones cached by previous calls
+    """
+    def __new__(cls, *args, **kwargs):
+        # resolves the parent instance
+        instance = super(_DrawValuesContextBlocker, cls).__new__(cls)
+        instance._parent = None
+        return instance
+
+    def __init__(self):
+        self.drawn_vars = dict()
+
+
 def is_fast_drawable(var):
     return isinstance(var, (numbers.Number,
                             np.ndarray,
@@ -288,14 +305,14 @@ def draw_values(params, point=None, size=None):
                 continue
 
             name = getattr(p, 'name', None)
-            if p in drawn:
+            if (p, size) in drawn:
                 # param was drawn in related contexts
-                v = drawn[p]
+                v = drawn[(p, size)]
                 evaluated[i] = v
             elif name is not None and name in point:
                 # param.name is in point
                 v = point[name]
-                evaluated[i] = drawn[p] = v
+                evaluated[i] = drawn[(p, size)] = v
             else:
                 # param still needs to be drawn
                 symbolic_params.append((i, p))
@@ -330,12 +347,12 @@ def draw_values(params, point=None, size=None):
                         named_nodes_children[k].update(nnc[k])
 
         # Init givens and the stack of nodes to try to `_draw_value` from
-        givens = {p.name: (p, v) for p, v in drawn.items()
+        givens = {p.name: (p, v) for (p, size), v in drawn.items()
                   if getattr(p, 'name', None) is not None}
         stack = list(leaf_nodes.values())  # A queue would be more appropriate
         while stack:
             next_ = stack.pop(0)
-            if next_ in drawn:
+            if (next_, size) in drawn:
                 # If the node already has a givens value, skip it
                 continue
             elif isinstance(next_, (tt.TensorConstant,
@@ -364,14 +381,14 @@ def draw_values(params, point=None, size=None):
                                         givens=temp_givens,
                                         size=size)
                     givens[next_.name] = (next_, value)
-                    drawn[next_] = value
+                    drawn[(next_, size)] = value
                 except theano.gof.fg.MissingInputError:
                     # The node failed, so we must add the node's parents to
                     # the stack of nodes to try to draw from. We exclude the
                     # nodes in the `params` list.
                     stack.extend([node for node in named_nodes_parents[next_]
                                   if node is not None and
-                                  node.name not in drawn and
+                                  (node, size) not in drawn and
                                   node not in params])
 
         # the below makes sure the graph is evaluated in order
@@ -386,15 +403,15 @@ def draw_values(params, point=None, size=None):
             missing_inputs = set()
             for param_idx in to_eval:
                 param = params[param_idx]
-                if param in drawn:
-                    evaluated[param_idx] = drawn[param]
+                if (param, size) in drawn:
+                    evaluated[param_idx] = drawn[(param, size)]
                 else:
                     try:  # might evaluate in a bad order,
                         value = _draw_value(param,
                                             point=point,
                                             givens=givens.values(),
                                             size=size)
-                        evaluated[param_idx] = drawn[param] = value
+                        evaluated[param_idx] = drawn[(param, size)] = value
                         givens[param.name] = (param, value)
                     except theano.gof.fg.MissingInputError:
                         missing_inputs.add(param_idx)
@@ -475,8 +492,11 @@ def _draw_value(param, point=None, givens=None, size=None):
                     # reset shape to account for shape changes
                     # with theano.shared inputs
                     dist_tmp.shape = np.array([])
-                    val = np.atleast_1d(dist_tmp.random(point=point,
-                                                        size=None))
+                    # We want to draw values to infer the dist_shape,
+                    # we don't want to store these drawn values to the context
+                    with _DrawValuesContextBlocker():
+                        val = np.atleast_1d(dist_tmp.random(point=point,
+                                                            size=None))
                     # Sometimes point may change the size of val but not the
                     # distribution's shape
                     if point and size is not None:
@@ -493,20 +513,34 @@ def _draw_value(param, point=None, givens=None, size=None):
                 variables, values = list(zip(*givens))
             else:
                 variables = values = []
-            func = _compile_theano_function(param, variables)
+            # We only truly care if the ancestors of param that were given
+            # value have the matching dshape and val.shape
+            param_ancestors = \
+                set(theano.gof.graph.ancestors([param],
+                                               blockers=list(variables))
+                    )
+            inputs = [(var, val) for var, val in
+                      zip(variables, values)
+                      if var in param_ancestors]
+            if inputs:
+                input_vars, input_vals = list(zip(*inputs))
+            else:
+                input_vars = []
+                input_vals = []
+            func = _compile_theano_function(param, input_vars)
             if size is not None:
                 size = np.atleast_1d(size)
             dshaped_variables = all((hasattr(var, 'dshape')
-                                     for var in variables))
+                                     for var in input_vars))
             if (values and dshaped_variables and
                 not all(var.dshape == getattr(val, 'shape', tuple())
-                        for var, val in zip(variables, values))):
-                output = np.array([func(*v) for v in zip(*values)])
+                        for var, val in zip(input_vars, input_vals))):
+                output = np.array([func(*v) for v in zip(*input_vals)])
             elif (size is not None and any((val.ndim > var.ndim)
-                  for var, val in zip(variables, values))):
-                output = np.array([func(*v) for v in zip(*values)])
+                  for var, val in zip(input_vars, input_vals))):
+                output = np.array([func(*v) for v in zip(*input_vals)])
             else:
-                output = func(*values)
+                output = func(*input_vals)
             return output
     raise ValueError('Unexpected type in draw_value: %s' % type(param))
 
@@ -515,7 +549,11 @@ def to_tuple(shape):
     """Convert ints, arrays, and Nones to tuples"""
     if shape is None:
         return tuple()
-    return tuple(np.atleast_1d(shape))
+    temp = np.atleast_1d(shape)
+    if temp.size == 0:
+        return tuple()
+    else:
+        return tuple(temp)
 
 def _is_one_d(dist_shape):
     if hasattr(dist_shape, 'dshape') and dist_shape.dshape in ((), (0,), (1,)):
diff --git a/pymc3/distributions/mixture.py b/pymc3/distributions/mixture.py
index 817a6343d6..2333a83505 100644
--- a/pymc3/distributions/mixture.py
+++ b/pymc3/distributions/mixture.py
@@ -5,7 +5,8 @@
 from ..math import logsumexp
 from .dist_math import bound, random_choice
 from .distribution import (Discrete, Distribution, draw_values,
-                           generate_samples, _DrawValuesContext)
+                           generate_samples, _DrawValuesContext,
+                           _DrawValuesContextBlocker, to_tuple)
 from .continuous import get_tau_sigma, Normal
 
 
@@ -102,6 +103,35 @@ def __init__(self, w, comp_dists, *args, **kwargs):
 
         super().__init__(shape, dtype, defaults=defaults, *args, **kwargs)
 
+    @property
+    def comp_dists(self):
+        return self._comp_dists
+
+    @comp_dists.setter
+    def comp_dists(self, _comp_dists):
+        self._comp_dists = _comp_dists
+        # Tests if the comp_dists can call random with non None size
+        with _DrawValuesContextBlocker():
+            if isinstance(self.comp_dists, (list, tuple)):
+                try:
+                    [comp_dist.random(size=23)
+                     for comp_dist in self.comp_dists]
+                    self._comp_dists_vect = True
+                except Exception:
+                    # The comp_dists cannot call random with non None size or
+                    # without knowledge of the point so we assume that we will
+                    # have to iterate calls to random to get the correct size
+                    self._comp_dists_vect = False
+            else:
+                try:
+                    self.comp_dists.random(size=23)
+                    self._comp_dists_vect = True
+                except Exception:
+                    # The comp_dists cannot call random with non None size or
+                    # without knowledge of the point so we assume that we will
+                    # have to iterate calls to random to get the correct size
+                    self._comp_dists_vect = False
+
     def _comp_logp(self, value):
         comp_dists = self.comp_dists
 
@@ -131,13 +161,33 @@ def _comp_modes(self):
                                        axis=1))
 
     def _comp_samples(self, point=None, size=None):
-        try:
-            samples = self.comp_dists.random(point=point, size=size)
-        except AttributeError:
-            samples = np.column_stack([comp_dist.random(point=point, size=size)
-                                       for comp_dist in self.comp_dists])
-
-        return np.squeeze(samples)
+        if self._comp_dists_vect or size is None:
+            try:
+                return self.comp_dists.random(point=point, size=size)
+            except AttributeError:
+                samples = np.array([comp_dist.random(point=point, size=size)
+                                    for comp_dist in self.comp_dists])
+                samples = np.moveaxis(samples, 0, samples.ndim - 1)
+        else:
+            # We must iterate the calls to random manually
+            size = to_tuple(size)
+            _size = int(np.prod(size))
+            try:
+                samples = np.array([self.comp_dists.random(point=point,
+                                                           size=None)
+                                    for _ in range(_size)])
+                samples = np.reshape(samples, size + samples.shape[1:])
+            except AttributeError:
+                samples = np.array([[comp_dist.random(point=point, size=None)
+                                     for _ in range(_size)]
+                                    for comp_dist in self.comp_dists])
+                samples = np.moveaxis(samples, 0, samples.ndim - 1)
+                samples = np.reshape(samples, size + samples[1:])
+
+        if samples.shape[-1] == 1:
+            return samples[..., 0]
+        else:
+            return samples
 
     def logp(self, value):
         w = self.w
@@ -147,42 +197,99 @@ def logp(self, value):
                      broadcast_conditions=False)
 
     def random(self, point=None, size=None):
+        # Convert size to tuple
+        size = to_tuple(size)
+        # Draw mixture weights and a sample from each mixture to infer shape
         with _DrawValuesContext() as draw_context:
-            w = draw_values([self.w], point=point)[0]
+            # We first need to check w and comp_tmp shapes and re compute size
+            w = draw_values([self.w], point=point, size=size)[0]
+        with _DrawValuesContextBlocker():
+            # We don't want to store the values drawn here in the context
+            # because they wont have the correct size
             comp_tmp = self._comp_samples(point=point, size=None)
-        if np.asarray(self.shape).size == 0:
-            distshape = np.asarray(np.broadcast(w, comp_tmp).shape)[..., :-1]
+
+        # When size is not None, it's hard to tell the w parameter shape
+        if size is not None and w.shape[:len(size)] == size:
+            w_shape = w.shape[len(size):]
+        else:
+            w_shape = w.shape
+
+        # Try to determine parameter shape and dist_shape
+        param_shape = np.broadcast(np.empty(w_shape),
+                                   comp_tmp).shape
+        if np.asarray(self.shape).size != 0:
+            dist_shape = np.broadcast(np.empty(self.shape),
+                                      np.empty(param_shape[:-1])).shape
+        else:
+            dist_shape = param_shape[:-1]
+
+        # When size is not None, maybe dist_shape partially overlaps with size
+        if size is not None:
+            if size == dist_shape:
+                size = None
+            elif size[-len(dist_shape):] == dist_shape:
+                size = size[:len(size) - len(dist_shape)]
+
+        # We get an integer _size instead of a tuple size for drawing the
+        # mixture, then we just reshape the output
+        if size is None:
+            _size = None
         else:
-            distshape = np.asarray(self.shape)
+            _size = int(np.prod(size))
+
+        # Now we must broadcast w to the shape that considers size, dist_shape
+        # and param_shape. However, we must take care with the cases in which
+        # dist_shape and param_shape overlap
+        if size is not None and w.shape[:len(size)] == size:
+            if w.shape[:len(size + dist_shape)] != (size + dist_shape):
+                # To allow w to broadcast, we insert new axis in between the
+                # "size" axis and the "mixture" axis
+                _w = w[(slice(None),) * len(size) +  # Index the size axis
+                       (np.newaxis,) * len(dist_shape) +  # Add new axis for the dist_shape
+                       (slice(None),)]  # Close with the slice of mixture components
+                w = np.broadcast_to(_w, size + dist_shape + (param_shape[-1],))
+        elif size is not None:
+            w = np.broadcast_to(w, size + dist_shape + (param_shape[-1],))
+        else:
+            w = np.broadcast_to(w, dist_shape + (param_shape[-1],))
 
-        # Normalize inputs
-        w /= w.sum(axis=-1, keepdims=True)
+        # Compute the total size of the mixture's random call with size
+        if _size is not None:
+            output_size = int(_size * np.prod(dist_shape) * param_shape[-1])
+        else:
+            output_size = int(np.prod(dist_shape) * param_shape[-1])
+        # Get the size we need for the mixture's random call
+        mixture_size = int(output_size // np.prod(comp_tmp.shape))
+        if mixture_size == 1 and _size is None:
+            mixture_size = None
+
+        # Semiflatten the mixture weights. The last axis is the number of
+        # mixture mixture components, and the rest is all about size,
+        # dist_shape and broadcasting
+        w = np.reshape(w, (-1, w.shape[-1]))
+        # Normalize mixture weights
+        w = w / w.sum(axis=-1, keepdims=True)
 
         w_samples = generate_samples(random_choice,
                                      p=w,
                                      broadcast_shape=w.shape[:-1] or (1,),
-                                     dist_shape=distshape,
-                                     size=size).squeeze()
-        if (size is None) or (distshape.size == 0):
-            with draw_context:
-                comp_samples = self._comp_samples(point=point, size=size)
-            if comp_samples.ndim > 1:
-                samples = np.squeeze(comp_samples[np.arange(w_samples.size), ..., w_samples])
-            else:
-                samples = np.squeeze(comp_samples[w_samples])
+                                     dist_shape=w.shape[:-1] or (1,),
+                                     size=size)
+        # Sample from the mixture
+        with draw_context:
+            mixed_samples = self._comp_samples(point=point,
+                                               size=mixture_size)
+        w_samples = w_samples.flatten()
+        # Semiflatten the mixture to be able to zip it with w_samples
+        mixed_samples = np.reshape(mixed_samples, (-1, comp_tmp.shape[-1]))
+        # Select the samples from the mixture
+        samples = np.array([mixed[choice] for choice, mixed in
+                            zip(w_samples, mixed_samples)])
+        # Reshape the samples to the correct output shape
+        if size is None:
+            samples = np.reshape(samples, dist_shape)
         else:
-            if w_samples.ndim == 1:
-                w_samples = np.reshape(np.tile(w_samples, size), (size,) + w_samples.shape)
-            samples = np.zeros((size,)+tuple(distshape))
-            with draw_context:
-                for i in range(size):
-                    w_tmp = w_samples[i, :]
-                    comp_tmp = self._comp_samples(point=point, size=None)
-                    if comp_tmp.ndim > 1:
-                        samples[i, :] = np.squeeze(comp_tmp[np.arange(w_tmp.size), ..., w_tmp])
-                    else:
-                        samples[i, :] = np.squeeze(comp_tmp[w_tmp])
-
+            samples = np.reshape(samples, size + dist_shape)
         return samples
 
 
diff --git a/pymc3/distributions/multivariate.py b/pymc3/distributions/multivariate.py
index 04ae00e56f..7d6d7d4a84 100755
--- a/pymc3/distributions/multivariate.py
+++ b/pymc3/distributions/multivariate.py
@@ -226,12 +226,13 @@ def __init__(self, mu, cov=None, tau=None, chol=None, lower=True,
 
     def random(self, point=None, size=None):
         if size is None:
-            size = []
+            size = tuple()
         else:
-            try:
-                size = list(size)
-            except TypeError:
-                size = [size]
+            if not isinstance(size, tuple):
+                try:
+                    size = tuple(size)
+                except TypeError:
+                    size = (size,)
 
         if self._cov_type == 'cov':
             mu, cov = draw_values([self.mu, self.cov], point=point, size=size)
@@ -242,23 +243,37 @@ def random(self, point=None, size=None):
                 dist = stats.multivariate_normal(
                     mean=mu, cov=cov, allow_singular=True)
             except ValueError:
-                size.append(mu.shape[-1])
+                size += (mu.shape[-1],)
                 return np.nan * np.zeros(size)
             return dist.rvs(size)
         elif self._cov_type == 'chol':
-            mu, chol = draw_values([self.mu, self.chol_cov], point=point, size=size)
-            if mu.shape[-1] != chol[0].shape[-1]:
+            mu, chol = draw_values([self.mu, self.chol_cov],
+                                   point=point, size=size)
+            if size and mu.ndim == len(size) and mu.shape == size:
+                mu = mu[..., np.newaxis]
+            if mu.shape[-1] != chol.shape[-1] and mu.shape[-1] != 1:
                 raise ValueError("Shapes for mu and chol don't match")
-
-            size.append(mu.shape[-1])
-            standard_normal = np.random.standard_normal(size)
-            return mu + np.dot(standard_normal, chol.T)
+            broadcast_shape = (
+                np.broadcast(np.empty(mu.shape[:-1]),
+                             np.empty(chol.shape[:-2])).shape
+            )
+
+            mu = np.broadcast_to(mu, broadcast_shape + (chol.shape[-1],))
+            chol = np.broadcast_to(chol, broadcast_shape + chol.shape[-2:])
+            # If mu and chol were fixed by the point, only the standard normal
+            # should change
+            if mu.shape[:len(size)] != size:
+                std_norm_shape = size + mu.shape
+            else:
+                std_norm_shape = mu.shape
+            standard_normal = np.random.standard_normal(std_norm_shape)
+            return mu + np.tensordot(standard_normal, chol, axes=[[-1], [-1]])
         else:
             mu, tau = draw_values([self.mu, self.tau], point=point, size=size)
             if mu.shape[-1] != tau[0].shape[-1]:
                 raise ValueError("Shapes for mu and tau don't match")
 
-            size.append(mu.shape[-1])
+            size += (mu.shape[-1],)
             try:
                 chol = linalg.cholesky(tau, lower=True)
             except linalg.LinAlgError:
@@ -1001,7 +1016,7 @@ def __init__(self, eta, n, sd_dist, *args, **kwargs):
         self.n = n
         self.eta = eta
 
-        if 'transform' in kwargs:
+        if 'transform' in kwargs and kwargs['transform'] is not None:
             raise ValueError('Invalid parameter: transform.')
         if 'shape' in kwargs:
             raise ValueError('Invalid parameter: shape.')
@@ -1052,6 +1067,90 @@ def logp(self, x):
 
         return norm + logp_lkj + logp_sd + det_invjac
 
+    def _random(self, n, eta, size=1):
+        eta_sample_shape = (size,) + eta.shape
+        P = np.eye(n) * np.ones(eta_sample_shape + (n, n))
+        # original implementation in R see:
+        # https://github.com/rmcelreath/rethinking/blob/master/R/distributions.r
+        beta = eta - 1. + n/2.
+        r12 = 2. * stats.beta.rvs(a=beta, b=beta, size=eta_sample_shape) - 1.
+        P[..., 0, 1] = r12
+        P[..., 1, 1] = np.sqrt(1. - r12**2)
+        for mp1 in range(2, n):
+            beta -= 0.5
+            y = stats.beta.rvs(a=mp1 / 2., b=beta, size=eta_sample_shape)
+            z = stats.norm.rvs(loc=0, scale=1, size=eta_sample_shape + (mp1,))
+            z = z / np.sqrt(np.einsum('ij,ij->j', z, z))
+            P[..., 0:mp1, mp1] = np.sqrt(y[..., np.newaxis]) * z
+            P[..., mp1, mp1] = np.sqrt(1. - y)
+        C = np.einsum('...ji,...jk->...ik', P, P)
+        D = np.atleast_1d(self.sd_dist.random(size=P.shape[:-2]))
+        if D.shape in [tuple(), (1,)]:
+            D = self.sd_dist.random(size=P.shape[:-1])
+        elif D.ndim < C.ndim - 1:
+            D = [D] + [self.sd_dist.random(size=P.shape[:-2])
+                       for _ in range(n - 1)]
+            D = np.moveaxis(np.array(D), 0, C.ndim - 2)
+        elif D.ndim == C.ndim - 1:
+            if D.shape[-1] == 1:
+                D = [D] + [self.sd_dist.random(size=P.shape[:-2])
+                           for _ in range(n - 1)]
+                D = np.concatenate(D, axis=-1)
+            elif D.shape[-1] != n:
+                raise ValueError('The size of the samples drawn from the '
+                                 'supplied sd_dist.random have the wrong '
+                                 'size. Expected {} but got {} instead.'.
+                                 format(n, D.shape[-1]))
+        else:
+            raise ValueError('Supplied sd_dist.random generates samples with '
+                             'too many dimensions. It must yield samples '
+                             'with 0 or 1 dimensions. Got {} instead'.
+                             format(D.ndim - C.ndim - 2))
+        C *= D[..., :, np.newaxis] * D[..., np.newaxis, :]
+        tril_idx = np.tril_indices(n, k=0)
+        return np.linalg.cholesky(C)[..., tril_idx[0], tril_idx[1]]
+
+    def random(self, point=None, size=None):
+        # Get parameters and broadcast them
+        n, eta = draw_values([self.n, self.eta], point=point, size=size)
+        broadcast_shape = np.broadcast(n, eta).shape
+        # We can only handle cov matrices with a constant n per random call
+        n = np.unique(n)
+        if len(n) > 1:
+            raise RuntimeError('Varying n is not supported for LKJCholeskyCov')
+        n = int(n[0])
+        dist_shape = ((n * (n + 1)) // 2,)
+        # We make sure that eta and the drawn n get their shapes broadcasted
+        eta = np.broadcast_to(eta, broadcast_shape)
+        # We change the size of the draw depending on the broadcast shape
+        sample_shape = broadcast_shape + dist_shape
+        if size is not None:
+            if not isinstance(size, tuple):
+                try:
+                    size = tuple(size)
+                except TypeError:
+                    size = (size,)
+            if size == sample_shape:
+                size = None
+            elif size == broadcast_shape:
+                size = None
+            elif size[-len(sample_shape):] == sample_shape:
+                size = size[:len(size) - len(sample_shape)]
+            elif size[-len(broadcast_shape):] == broadcast_shape:
+                size = size[:len(size) - len(broadcast_shape)]
+        # We will always provide _random with an integer size and then reshape
+        # the output to get the correct size
+        if size is not None:
+            _size = np.prod(size)
+        else:
+            _size = 1
+        samples = self._random(n, eta, size=_size)
+        if size is None:
+            samples = samples[0]
+        else:
+            samples = np.reshape(samples, size + sample_shape)
+        return samples
+
 
 class LKJCorr(Continuous):
     R"""
diff --git a/pymc3/distributions/transforms.py b/pymc3/distributions/transforms.py
index 43fd695dd7..81ba1c055a 100644
--- a/pymc3/distributions/transforms.py
+++ b/pymc3/distributions/transforms.py
@@ -446,7 +446,7 @@ def forward(self, y):
         return tt.advanced_set_subtensor1(y, tt.log(y[self.diag_idxs]), self.diag_idxs)
 
     def forward_val(self, y, point=None):
-        y[self.diag_idxs] = np.log(y[self.diag_idxs])
+        y[..., self.diag_idxs] = np.log(y[..., self.diag_idxs])
         return y
 
     def jacobian_det(self, y):
diff --git a/pymc3/model.py b/pymc3/model.py
index 67ab20f7b9..4e0f7ec10a 100644
--- a/pymc3/model.py
+++ b/pymc3/model.py
@@ -121,6 +121,8 @@ def _get_named_nodes_and_relations(graph, parent, leaf_nodes,
                 except KeyError:
                     node_parents[graph] = {parent}
                 node_children[parent].add(graph)
+            else:
+                node_parents[graph] = set()
             # Flag that the leaf node has no children
             node_children[graph] = set()
     else:  # Intermediate node
@@ -131,6 +133,8 @@ def _get_named_nodes_and_relations(graph, parent, leaf_nodes,
                 except KeyError:
                     node_parents[graph] = {parent}
                 node_children[parent].add(graph)
+            else:
+                node_parents[graph] = set()
             # The current node will be set as the parent of the next
             # nodes only if it is a named node
             parent = graph
diff --git a/pymc3/tests/test_distributions_random.py b/pymc3/tests/test_distributions_random.py
index b5c0593806..0a2a380ce6 100644
--- a/pymc3/tests/test_distributions_random.py
+++ b/pymc3/tests/test_distributions_random.py
@@ -8,7 +8,9 @@
 import theano
 
 import pymc3 as pm
-from pymc3.distributions.distribution import draw_values
+from pymc3.distributions.distribution import (draw_values,
+                                              _DrawValuesContext,
+                                              _DrawValuesContextBlocker)
 from .helpers import SeededTest
 from .test_distributions import (
     build_model, Domain, product, R, Rplus, Rplusbig, Runif, Rplusdunif,
@@ -114,6 +116,44 @@ def test_random_sample_returns_nd_array(self):
         assert isinstance(tau, np.ndarray)
 
 
+class TestDrawValuesContext:
+    def test_normal_context(self):
+        with _DrawValuesContext() as context0:
+            assert context0.parent is None
+            context0.drawn_vars['root_test'] = 1
+            with _DrawValuesContext() as context1:
+                assert id(context1.drawn_vars) == id(context0.drawn_vars)
+                assert context1.parent == context0
+                with _DrawValuesContext() as context2:
+                    assert id(context2.drawn_vars) == id(context0.drawn_vars)
+                    assert context2.parent == context1
+                    context2.drawn_vars['leaf_test'] = 2
+                assert context1.drawn_vars['leaf_test'] == 2
+                context1.drawn_vars['root_test'] = 3
+            assert context0.drawn_vars['root_test'] == 3
+            assert context0.drawn_vars['leaf_test'] == 2
+
+    def test_blocking_context(self):
+        with _DrawValuesContext() as context0:
+            assert context0.parent is None
+            context0.drawn_vars['root_test'] = 1
+            with _DrawValuesContext() as context1:
+                assert id(context1.drawn_vars) == id(context0.drawn_vars)
+                assert context1.parent == context0
+                with _DrawValuesContextBlocker() as blocker:
+                    assert id(blocker.drawn_vars) != id(context0.drawn_vars)
+                    assert blocker.parent is None
+                    blocker.drawn_vars['root_test'] = 2
+                    with _DrawValuesContext() as context2:
+                        assert id(context2.drawn_vars) == id(blocker.drawn_vars)
+                        assert context2.parent == blocker
+                        context2.drawn_vars['root_test'] = 3
+                        context2.drawn_vars['leaf_test'] = 4
+                    assert blocker.drawn_vars['root_test'] == 3
+                assert 'leaf_test' not in context1.drawn_vars
+            assert context0.drawn_vars['root_test'] == 1
+
+
 class BaseTestCases:
     class BaseTestCase(SeededTest):
         shape = 5
diff --git a/pymc3/tests/test_mixture.py b/pymc3/tests/test_mixture.py
index 54be0e6179..de6a95fc44 100644
--- a/pymc3/tests/test_mixture.py
+++ b/pymc3/tests/test_mixture.py
@@ -2,6 +2,7 @@
 from numpy.testing import assert_allclose
 
 from .helpers import SeededTest
+import pymc3 as pm
 from pymc3 import Dirichlet, Gamma, Normal, Lognormal, Poisson, Exponential, \
     Mixture, NormalMixture, MvNormal, sample, Metropolis, Model
 import scipy.stats as st
@@ -231,3 +232,55 @@ def mixmixlogp(value, point):
         assert_allclose(mixmixlogpg, mix.logp_elemwise(test_point))
         assert_allclose(priorlogp + mixmixlogpg.sum(),
                         model.logp(test_point))
+
+    def test_sample_prior_and_posterior(self):
+        def build_toy_dataset(N, K):
+            pi = np.array([0.2, 0.5, 0.3])
+            mus = [[1, 1, 1], [-1, -1, -1], [2, -2, 0]]
+            stds = [[0.1, 0.1, 0.1], [0.1, 0.2, 0.2], [0.2, 0.3, 0.3]]
+            x = np.zeros((N, 3), dtype=np.float32)
+            y = np.zeros((N,), dtype=np.int)
+            for n in range(N):
+                k = np.argmax(np.random.multinomial(1, pi))
+                x[n, :] = np.random.multivariate_normal(mus[k],
+                                                        np.diag(stds[k]))
+                y[n] = k
+            return x, y
+
+        N = 100  # number of data points
+        K = 3  # number of mixture components
+        D = 3  # dimensionality of the data
+
+        X, y = build_toy_dataset(N, K)
+
+        with pm.Model() as model:
+            pi = pm.Dirichlet('pi', np.ones(K))
+
+            comp_dist = []
+            mu = []
+            packed_chol = []
+            chol = []
+            for i in range(K):
+                mu.append(pm.Normal('mu%i' % i, 0, 10, shape=D))
+                packed_chol.append(
+                    pm.LKJCholeskyCov('chol_cov_%i' % i,
+                                      eta=2,
+                                      n=D,
+                                      sd_dist=pm.HalfNormal.dist(2.5))
+                )
+                chol.append(pm.expand_packed_triangular(D, packed_chol[i],
+                                                        lower=True))
+                comp_dist.append(pm.MvNormal.dist(mu=mu[i], chol=chol[i]))
+
+            pm.Mixture('x_obs', pi, comp_dist, observed=X)
+        with model:
+            trace = pm.sample(30, tune=10, chains=1)
+
+        n_samples = 20
+        with model:
+            ppc = pm.sample_posterior_predictive(trace, n_samples)
+            prior = pm.sample_prior_predictive(samples=n_samples)
+        assert ppc['x_obs'].shape == (n_samples,) + X.shape
+        assert prior['x_obs'].shape == (n_samples,) + X.shape
+        assert prior['mu0'].shape == (n_samples, D)
+        assert prior['chol_cov_0'].shape == (n_samples, D * (D + 1) // 2)