pymc-devs · twiecki · Jan 3, 2019 · Dec 5, 2018 · Dec 7, 2018 · Dec 7, 2018
diff --git a/pymc3/distributions/distribution.py b/pymc3/distributions/distribution.py
@@ -221,7 +221,7 @@ def __new__(cls, *args, **kwargs):
             potential_parent = cls.get_contexts()[-1]
             # We have to make sure that the context is a _DrawValuesContext
             # and not a Model
-            if isinstance(potential_parent, cls):
+            if isinstance(potential_parent, _DrawValuesContext):
                 instance._parent = potential_parent
             else:
                 instance._parent = None
@@ -235,7 +235,8 @@ def __init__(self):
             # another _DrawValuesContext will share the reference to the
             # drawn_vars dictionary. This means that separate branches
             # in the nested _DrawValuesContext context tree will see the
-            # same drawn values
+            # same drawn values.
+            # The drawn_vars keys shall be (RV, size) tuples
             self.drawn_vars = self.parent.drawn_vars
         else:
             self.drawn_vars = dict()
@@ -245,6 +246,22 @@ def parent(self):
         return self._parent
 
 
+class _DrawValuesContextBlocker(_DrawValuesContext, metaclass=InitContextMeta):
+    """
+    Context manager that starts a new drawn variables context disregarding all
+    parent contexts. This can be used inside a random method to ensure that
+    the drawn values wont be the ones cached by previous calls
+    """
+    def __new__(cls, *args, **kwargs):
+        # resolves the parent instance
+        instance = super(_DrawValuesContextBlocker, cls).__new__(cls)
+        instance._parent = None
+        return instance
+
+    def __init__(self):
+        self.drawn_vars = dict()
+
+
 def is_fast_drawable(var):
     return isinstance(var, (numbers.Number,
                             np.ndarray,
@@ -288,14 +305,14 @@ def draw_values(params, point=None, size=None):
                 continue
 
             name = getattr(p, 'name', None)
-            if p in drawn:
+            if (p, size) in drawn:
                 # param was drawn in related contexts
-                v = drawn[p]
+                v = drawn[(p, size)]
                 evaluated[i] = v
             elif name is not None and name in point:
                 # param.name is in point
                 v = point[name]
-                evaluated[i] = drawn[p] = v
+                evaluated[i] = drawn[(p, size)] = v
             else:
                 # param still needs to be drawn
                 symbolic_params.append((i, p))
@@ -330,12 +347,12 @@ def draw_values(params, point=None, size=None):
                         named_nodes_children[k].update(nnc[k])
 
         # Init givens and the stack of nodes to try to `_draw_value` from
-        givens = {p.name: (p, v) for p, v in drawn.items()
+        givens = {p.name: (p, v) for (p, size), v in drawn.items()
                   if getattr(p, 'name', None) is not None}
         stack = list(leaf_nodes.values())  # A queue would be more appropriate
         while stack:
             next_ = stack.pop(0)
-            if next_ in drawn:
+            if (next_, size) in drawn:
                 # If the node already has a givens value, skip it
                 continue
             elif isinstance(next_, (tt.TensorConstant,
@@ -364,14 +381,14 @@ def draw_values(params, point=None, size=None):
                                         givens=temp_givens,
                                         size=size)
                     givens[next_.name] = (next_, value)
-                    drawn[next_] = value
+                    drawn[(next_, size)] = value
                 except theano.gof.fg.MissingInputError:
                     # The node failed, so we must add the node's parents to
                     # the stack of nodes to try to draw from. We exclude the
                     # nodes in the `params` list.
                     stack.extend([node for node in named_nodes_parents[next_]
                                   if node is not None and
-                                  node.name not in drawn and
+                                  (node, size) not in drawn and
                                   node not in params])
 
         # the below makes sure the graph is evaluated in order
@@ -386,15 +403,15 @@ def draw_values(params, point=None, size=None):
             missing_inputs = set()
             for param_idx in to_eval:
                 param = params[param_idx]
-                if param in drawn:
-                    evaluated[param_idx] = drawn[param]
+                if (param, size) in drawn:
+                    evaluated[param_idx] = drawn[(param, size)]
                 else:
                     try:  # might evaluate in a bad order,
                         value = _draw_value(param,
                                             point=point,
                                             givens=givens.values(),
                                             size=size)
-                        evaluated[param_idx] = drawn[param] = value
+                        evaluated[param_idx] = drawn[(param, size)] = value
                         givens[param.name] = (param, value)
                     except theano.gof.fg.MissingInputError:
                         missing_inputs.add(param_idx)
@@ -475,8 +492,11 @@ def _draw_value(param, point=None, givens=None, size=None):
                     # reset shape to account for shape changes
                     # with theano.shared inputs
                     dist_tmp.shape = np.array([])
-                    val = np.atleast_1d(dist_tmp.random(point=point,
-                                                        size=None))
+                    # We want to draw values to infer the dist_shape,
+                    # we don't want to store these drawn values to the context
+                    with _DrawValuesContextBlocker():
+                        val = np.atleast_1d(dist_tmp.random(point=point,
+                                                            size=None))
                     # Sometimes point may change the size of val but not the
                     # distribution's shape
                     if point and size is not None:
@@ -493,20 +513,34 @@ def _draw_value(param, point=None, givens=None, size=None):
                 variables, values = list(zip(*givens))
             else:
                 variables = values = []
-            func = _compile_theano_function(param, variables)
+            # We only truly care if the ancestors of param that were given
+            # value have the matching dshape and val.shape
+            param_ancestors = \
+                set(theano.gof.graph.ancestors([param],
+                                               blockers=list(variables))
+                    )
+            inputs = [(var, val) for var, val in
+                      zip(variables, values)
+                      if var in param_ancestors]
+            if inputs:
+                input_vars, input_vals = list(zip(*inputs))
+            else:
+                input_vars = []
+                input_vals = []
+            func = _compile_theano_function(param, input_vars)
             if size is not None:
                 size = np.atleast_1d(size)
             dshaped_variables = all((hasattr(var, 'dshape')
-                                     for var in variables))
+                                     for var in input_vars))
             if (values and dshaped_variables and
                 not all(var.dshape == getattr(val, 'shape', tuple())
-                        for var, val in zip(variables, values))):
-                output = np.array([func(*v) for v in zip(*values)])
+                        for var, val in zip(input_vars, input_vals))):
+                output = np.array([func(*v) for v in zip(*input_vals)])
             elif (size is not None and any((val.ndim > var.ndim)
-                  for var, val in zip(variables, values))):
-                output = np.array([func(*v) for v in zip(*values)])
+                  for var, val in zip(input_vars, input_vals))):
+                output = np.array([func(*v) for v in zip(*input_vals)])
             else:
-                output = func(*values)
+                output = func(*input_vals)
             return output
     raise ValueError('Unexpected type in draw_value: %s' % type(param))
 
@@ -515,7 +549,11 @@ def to_tuple(shape):
     """Convert ints, arrays, and Nones to tuples"""
     if shape is None:
         return tuple()
-    return tuple(np.atleast_1d(shape))
+    temp = np.atleast_1d(shape)
+    if temp.size == 0:
+        return tuple()
+    else:
+        return tuple(temp)
 
 def _is_one_d(dist_shape):
     if hasattr(dist_shape, 'dshape') and dist_shape.dshape in ((), (0,), (1,)):

diff --git a/pymc3/distributions/mixture.py b/pymc3/distributions/mixture.py
@@ -5,7 +5,8 @@
 from ..math import logsumexp
 from .dist_math import bound, random_choice
 from .distribution import (Discrete, Distribution, draw_values,
-                           generate_samples, _DrawValuesContext)
+                           generate_samples, _DrawValuesContext,
+                           _DrawValuesContextBlocker, to_tuple)
 from .continuous import get_tau_sigma, Normal
 
 
@@ -102,6 +103,35 @@ def __init__(self, w, comp_dists, *args, **kwargs):
 
         super().__init__(shape, dtype, defaults=defaults, *args, **kwargs)
 
+    @property
+    def comp_dists(self):
+        return self._comp_dists
+
+    @comp_dists.setter
+    def comp_dists(self, _comp_dists):
+        self._comp_dists = _comp_dists
+        # Tests if the comp_dists can call random with non None size
+        with _DrawValuesContextBlocker():
+            if isinstance(self.comp_dists, (list, tuple)):
+                try:
+                    [comp_dist.random(size=23)
+                     for comp_dist in self.comp_dists]
+                    self._comp_dists_vect = True
+                except Exception:
+                    # The comp_dists cannot call random with non None size or
+                    # without knowledge of the point so we assume that we will
+                    # have to iterate calls to random to get the correct size
+                    self._comp_dists_vect = False
+            else:
+                try:
+                    self.comp_dists.random(size=23)
+                    self._comp_dists_vect = True
+                except Exception:
+                    # The comp_dists cannot call random with non None size or
+                    # without knowledge of the point so we assume that we will
+                    # have to iterate calls to random to get the correct size
+                    self._comp_dists_vect = False
+
     def _comp_logp(self, value):
         comp_dists = self.comp_dists
 
@@ -131,13 +161,33 @@ def _comp_modes(self):
                                        axis=1))
 
     def _comp_samples(self, point=None, size=None):
-        try:
-            samples = self.comp_dists.random(point=point, size=size)
-        except AttributeError:
-            samples = np.column_stack([comp_dist.random(point=point, size=size)
-                                       for comp_dist in self.comp_dists])
-
-        return np.squeeze(samples)
+        if self._comp_dists_vect or size is None:
+            try:
+                return self.comp_dists.random(point=point, size=size)
+            except AttributeError:
+                samples = np.array([comp_dist.random(point=point, size=size)
+                                    for comp_dist in self.comp_dists])
+                samples = np.moveaxis(samples, 0, samples.ndim - 1)
+        else:
+            # We must iterate the calls to random manually
+            size = to_tuple(size)
+            _size = int(np.prod(size))
+            try:
+                samples = np.array([self.comp_dists.random(point=point,
+                                                           size=None)
+                                    for _ in range(_size)])
+                samples = np.reshape(samples, size + samples.shape[1:])
+            except AttributeError:
+                samples = np.array([[comp_dist.random(point=point, size=None)
+                                     for _ in range(_size)]
+                                    for comp_dist in self.comp_dists])
+                samples = np.moveaxis(samples, 0, samples.ndim - 1)
+                samples = np.reshape(samples, size + samples[1:])
+
+        if samples.shape[-1] == 1:
+            return samples[..., 0]
+        else:
+            return samples
 
     def logp(self, value):
         w = self.w
@@ -147,42 +197,99 @@ def logp(self, value):
                      broadcast_conditions=False)
 
     def random(self, point=None, size=None):
+        # Convert size to tuple
+        size = to_tuple(size)
+        # Draw mixture weights and a sample from each mixture to infer shape
         with _DrawValuesContext() as draw_context:
-            w = draw_values([self.w], point=point)[0]
+            # We first need to check w and comp_tmp shapes and re compute size
+            w = draw_values([self.w], point=point, size=size)[0]
+        with _DrawValuesContextBlocker():
+            # We don't want to store the values drawn here in the context
+            # because they wont have the correct size
             comp_tmp = self._comp_samples(point=point, size=None)
-        if np.asarray(self.shape).size == 0:
-            distshape = np.asarray(np.broadcast(w, comp_tmp).shape)[..., :-1]
+
+        # When size is not None, it's hard to tell the w parameter shape
+        if size is not None and w.shape[:len(size)] == size:
+            w_shape = w.shape[len(size):]
+        else:
+            w_shape = w.shape
+
+        # Try to determine parameter shape and dist_shape
+        param_shape = np.broadcast(np.empty(w_shape),
+                                   comp_tmp).shape
+        if np.asarray(self.shape).size != 0:
+            dist_shape = np.broadcast(np.empty(self.shape),
+                                      np.empty(param_shape[:-1])).shape
+        else:
+            dist_shape = param_shape[:-1]
+
+        # When size is not None, maybe dist_shape partially overlaps with size
+        if size is not None:
+            if size == dist_shape:
+                size = None
+            elif size[-len(dist_shape):] == dist_shape:
+                size = size[:len(size) - len(dist_shape)]
+
+        # We get an integer _size instead of a tuple size for drawing the
+        # mixture, then we just reshape the output
+        if size is None:
+            _size = None
         else:
-            distshape = np.asarray(self.shape)
+            _size = int(np.prod(size))
+
+        # Now we must broadcast w to the shape that considers size, dist_shape
+        # and param_shape. However, we must take care with the cases in which
+        # dist_shape and param_shape overlap
+        if size is not None and w.shape[:len(size)] == size:
+            if w.shape[:len(size + dist_shape)] != (size + dist_shape):
+                # To allow w to broadcast, we insert new axis in between the
+                # "size" axis and the "mixture" axis
+                _w = w[(slice(None),) * len(size) +  # Index the size axis
+                       (np.newaxis,) * len(dist_shape) +  # Add new axis for the dist_shape
+                       (slice(None),)]  # Close with the slice of mixture components
+                w = np.broadcast_to(_w, size + dist_shape + (param_shape[-1],))
+        elif size is not None:
+            w = np.broadcast_to(w, size + dist_shape + (param_shape[-1],))
+        else:
+            w = np.broadcast_to(w, dist_shape + (param_shape[-1],))
 
-        # Normalize inputs
-        w /= w.sum(axis=-1, keepdims=True)
+        # Compute the total size of the mixture's random call with size
+        if _size is not None:
+            output_size = int(_size * np.prod(dist_shape) * param_shape[-1])
+        else:
+            output_size = int(np.prod(dist_shape) * param_shape[-1])
+        # Get the size we need for the mixture's random call
+        mixture_size = int(output_size // np.prod(comp_tmp.shape))
+        if mixture_size == 1 and _size is None:
+            mixture_size = None
+
+        # Semiflatten the mixture weights. The last axis is the number of
+        # mixture mixture components, and the rest is all about size,
+        # dist_shape and broadcasting
+        w = np.reshape(w, (-1, w.shape[-1]))
+        # Normalize mixture weights
+        w = w / w.sum(axis=-1, keepdims=True)
 
         w_samples = generate_samples(random_choice,
                                      p=w,
                                      broadcast_shape=w.shape[:-1] or (1,),
-                                     dist_shape=distshape,
-                                     size=size).squeeze()
-        if (size is None) or (distshape.size == 0):
-            with draw_context:
-                comp_samples = self._comp_samples(point=point, size=size)
-            if comp_samples.ndim > 1:
-                samples = np.squeeze(comp_samples[np.arange(w_samples.size), ..., w_samples])
-            else:
-                samples = np.squeeze(comp_samples[w_samples])
+                                     dist_shape=w.shape[:-1] or (1,),
+                                     size=size)
+        # Sample from the mixture
+        with draw_context:
+            mixed_samples = self._comp_samples(point=point,
+                                               size=mixture_size)
+        w_samples = w_samples.flatten()
+        # Semiflatten the mixture to be able to zip it with w_samples
+        mixed_samples = np.reshape(mixed_samples, (-1, comp_tmp.shape[-1]))
+        # Select the samples from the mixture
+        samples = np.array([mixed[choice] for choice, mixed in
+                            zip(w_samples, mixed_samples)])
+        # Reshape the samples to the correct output shape
+        if size is None:
+            samples = np.reshape(samples, dist_shape)
         else:
-            if w_samples.ndim == 1:
-                w_samples = np.reshape(np.tile(w_samples, size), (size,) + w_samples.shape)
-            samples = np.zeros((size,)+tuple(distshape))
-            with draw_context:
-                for i in range(size):
-                    w_tmp = w_samples[i, :]
-                    comp_tmp = self._comp_samples(point=point, size=None)
-                    if comp_tmp.ndim > 1:
-                        samples[i, :] = np.squeeze(comp_tmp[np.arange(w_tmp.size), ..., w_tmp])
-                    else:
-                        samples[i, :] = np.squeeze(comp_tmp[w_tmp])
-
+            samples = np.reshape(samples, size + dist_shape)
         return samples