Skip to content

Fix for #3270 #3293

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 8 commits into from
Jan 3, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
82 changes: 60 additions & 22 deletions pymc3/distributions/distribution.py
Original file line number Diff line number Diff line change
Expand Up @@ -221,7 +221,7 @@ def __new__(cls, *args, **kwargs):
potential_parent = cls.get_contexts()[-1]
# We have to make sure that the context is a _DrawValuesContext
# and not a Model
if isinstance(potential_parent, cls):
if isinstance(potential_parent, _DrawValuesContext):
instance._parent = potential_parent
else:
instance._parent = None
Expand All @@ -235,7 +235,8 @@ def __init__(self):
# another _DrawValuesContext will share the reference to the
# drawn_vars dictionary. This means that separate branches
# in the nested _DrawValuesContext context tree will see the
# same drawn values
# same drawn values.
# The drawn_vars keys shall be (RV, size) tuples
self.drawn_vars = self.parent.drawn_vars
else:
self.drawn_vars = dict()
Expand All @@ -245,6 +246,22 @@ def parent(self):
return self._parent


class _DrawValuesContextBlocker(_DrawValuesContext, metaclass=InitContextMeta):
"""
Context manager that starts a new drawn variables context disregarding all
parent contexts. This can be used inside a random method to ensure that
the drawn values wont be the ones cached by previous calls
"""
def __new__(cls, *args, **kwargs):
# resolves the parent instance
instance = super(_DrawValuesContextBlocker, cls).__new__(cls)
instance._parent = None
return instance

def __init__(self):
self.drawn_vars = dict()


def is_fast_drawable(var):
return isinstance(var, (numbers.Number,
np.ndarray,
Expand Down Expand Up @@ -288,14 +305,14 @@ def draw_values(params, point=None, size=None):
continue

name = getattr(p, 'name', None)
if p in drawn:
if (p, size) in drawn:
# param was drawn in related contexts
v = drawn[p]
v = drawn[(p, size)]
evaluated[i] = v
elif name is not None and name in point:
# param.name is in point
v = point[name]
evaluated[i] = drawn[p] = v
evaluated[i] = drawn[(p, size)] = v
else:
# param still needs to be drawn
symbolic_params.append((i, p))
Expand Down Expand Up @@ -330,12 +347,12 @@ def draw_values(params, point=None, size=None):
named_nodes_children[k].update(nnc[k])

# Init givens and the stack of nodes to try to `_draw_value` from
givens = {p.name: (p, v) for p, v in drawn.items()
givens = {p.name: (p, v) for (p, size), v in drawn.items()
if getattr(p, 'name', None) is not None}
stack = list(leaf_nodes.values()) # A queue would be more appropriate
while stack:
next_ = stack.pop(0)
if next_ in drawn:
if (next_, size) in drawn:
# If the node already has a givens value, skip it
continue
elif isinstance(next_, (tt.TensorConstant,
Expand Down Expand Up @@ -364,14 +381,14 @@ def draw_values(params, point=None, size=None):
givens=temp_givens,
size=size)
givens[next_.name] = (next_, value)
drawn[next_] = value
drawn[(next_, size)] = value
except theano.gof.fg.MissingInputError:
# The node failed, so we must add the node's parents to
# the stack of nodes to try to draw from. We exclude the
# nodes in the `params` list.
stack.extend([node for node in named_nodes_parents[next_]
if node is not None and
node.name not in drawn and
(node, size) not in drawn and
node not in params])

# the below makes sure the graph is evaluated in order
Expand All @@ -386,15 +403,15 @@ def draw_values(params, point=None, size=None):
missing_inputs = set()
for param_idx in to_eval:
param = params[param_idx]
if param in drawn:
evaluated[param_idx] = drawn[param]
if (param, size) in drawn:
evaluated[param_idx] = drawn[(param, size)]
else:
try: # might evaluate in a bad order,
value = _draw_value(param,
point=point,
givens=givens.values(),
size=size)
evaluated[param_idx] = drawn[param] = value
evaluated[param_idx] = drawn[(param, size)] = value
givens[param.name] = (param, value)
except theano.gof.fg.MissingInputError:
missing_inputs.add(param_idx)
Expand Down Expand Up @@ -475,8 +492,11 @@ def _draw_value(param, point=None, givens=None, size=None):
# reset shape to account for shape changes
# with theano.shared inputs
dist_tmp.shape = np.array([])
val = np.atleast_1d(dist_tmp.random(point=point,
size=None))
# We want to draw values to infer the dist_shape,
# we don't want to store these drawn values to the context
with _DrawValuesContextBlocker():
val = np.atleast_1d(dist_tmp.random(point=point,
size=None))
# Sometimes point may change the size of val but not the
# distribution's shape
if point and size is not None:
Expand All @@ -493,20 +513,34 @@ def _draw_value(param, point=None, givens=None, size=None):
variables, values = list(zip(*givens))
else:
variables = values = []
func = _compile_theano_function(param, variables)
# We only truly care if the ancestors of param that were given
# value have the matching dshape and val.shape
param_ancestors = \
set(theano.gof.graph.ancestors([param],
blockers=list(variables))
)
inputs = [(var, val) for var, val in
zip(variables, values)
if var in param_ancestors]
if inputs:
input_vars, input_vals = list(zip(*inputs))
else:
input_vars = []
input_vals = []
func = _compile_theano_function(param, input_vars)
if size is not None:
size = np.atleast_1d(size)
dshaped_variables = all((hasattr(var, 'dshape')
for var in variables))
for var in input_vars))
if (values and dshaped_variables and
not all(var.dshape == getattr(val, 'shape', tuple())
for var, val in zip(variables, values))):
output = np.array([func(*v) for v in zip(*values)])
for var, val in zip(input_vars, input_vals))):
output = np.array([func(*v) for v in zip(*input_vals)])
elif (size is not None and any((val.ndim > var.ndim)
for var, val in zip(variables, values))):
output = np.array([func(*v) for v in zip(*values)])
for var, val in zip(input_vars, input_vals))):
output = np.array([func(*v) for v in zip(*input_vals)])
else:
output = func(*values)
output = func(*input_vals)
return output
raise ValueError('Unexpected type in draw_value: %s' % type(param))

Expand All @@ -515,7 +549,11 @@ def to_tuple(shape):
"""Convert ints, arrays, and Nones to tuples"""
if shape is None:
return tuple()
return tuple(np.atleast_1d(shape))
temp = np.atleast_1d(shape)
if temp.size == 0:
return tuple()
else:
return tuple(temp)

def _is_one_d(dist_shape):
if hasattr(dist_shape, 'dshape') and dist_shape.dshape in ((), (0,), (1,)):
Expand Down
177 changes: 142 additions & 35 deletions pymc3/distributions/mixture.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,8 @@
from ..math import logsumexp
from .dist_math import bound, random_choice
from .distribution import (Discrete, Distribution, draw_values,
generate_samples, _DrawValuesContext)
generate_samples, _DrawValuesContext,
_DrawValuesContextBlocker, to_tuple)
from .continuous import get_tau_sigma, Normal


Expand Down Expand Up @@ -102,6 +103,35 @@ def __init__(self, w, comp_dists, *args, **kwargs):

super().__init__(shape, dtype, defaults=defaults, *args, **kwargs)

@property
def comp_dists(self):
return self._comp_dists

@comp_dists.setter
def comp_dists(self, _comp_dists):
self._comp_dists = _comp_dists
# Tests if the comp_dists can call random with non None size
with _DrawValuesContextBlocker():
if isinstance(self.comp_dists, (list, tuple)):
try:
[comp_dist.random(size=23)
for comp_dist in self.comp_dists]
self._comp_dists_vect = True
except Exception:
# The comp_dists cannot call random with non None size or
# without knowledge of the point so we assume that we will
# have to iterate calls to random to get the correct size
self._comp_dists_vect = False
else:
try:
self.comp_dists.random(size=23)
self._comp_dists_vect = True
except Exception:
# The comp_dists cannot call random with non None size or
# without knowledge of the point so we assume that we will
# have to iterate calls to random to get the correct size
self._comp_dists_vect = False

def _comp_logp(self, value):
comp_dists = self.comp_dists

Expand Down Expand Up @@ -131,13 +161,33 @@ def _comp_modes(self):
axis=1))

def _comp_samples(self, point=None, size=None):
try:
samples = self.comp_dists.random(point=point, size=size)
except AttributeError:
samples = np.column_stack([comp_dist.random(point=point, size=size)
for comp_dist in self.comp_dists])

return np.squeeze(samples)
if self._comp_dists_vect or size is None:
try:
return self.comp_dists.random(point=point, size=size)
except AttributeError:
samples = np.array([comp_dist.random(point=point, size=size)
for comp_dist in self.comp_dists])
samples = np.moveaxis(samples, 0, samples.ndim - 1)
else:
# We must iterate the calls to random manually
size = to_tuple(size)
_size = int(np.prod(size))
try:
samples = np.array([self.comp_dists.random(point=point,
size=None)
for _ in range(_size)])
samples = np.reshape(samples, size + samples.shape[1:])
except AttributeError:
samples = np.array([[comp_dist.random(point=point, size=None)
for _ in range(_size)]
for comp_dist in self.comp_dists])
samples = np.moveaxis(samples, 0, samples.ndim - 1)
samples = np.reshape(samples, size + samples[1:])

if samples.shape[-1] == 1:
return samples[..., 0]
else:
return samples

def logp(self, value):
w = self.w
Expand All @@ -147,42 +197,99 @@ def logp(self, value):
broadcast_conditions=False)

def random(self, point=None, size=None):
# Convert size to tuple
size = to_tuple(size)
# Draw mixture weights and a sample from each mixture to infer shape
with _DrawValuesContext() as draw_context:
w = draw_values([self.w], point=point)[0]
# We first need to check w and comp_tmp shapes and re compute size
w = draw_values([self.w], point=point, size=size)[0]
with _DrawValuesContextBlocker():
# We don't want to store the values drawn here in the context
# because they wont have the correct size
comp_tmp = self._comp_samples(point=point, size=None)
if np.asarray(self.shape).size == 0:
distshape = np.asarray(np.broadcast(w, comp_tmp).shape)[..., :-1]

# When size is not None, it's hard to tell the w parameter shape
if size is not None and w.shape[:len(size)] == size:
w_shape = w.shape[len(size):]
else:
w_shape = w.shape

# Try to determine parameter shape and dist_shape
param_shape = np.broadcast(np.empty(w_shape),
comp_tmp).shape
if np.asarray(self.shape).size != 0:
dist_shape = np.broadcast(np.empty(self.shape),
np.empty(param_shape[:-1])).shape
else:
dist_shape = param_shape[:-1]

# When size is not None, maybe dist_shape partially overlaps with size
if size is not None:
if size == dist_shape:
size = None
elif size[-len(dist_shape):] == dist_shape:
size = size[:len(size) - len(dist_shape)]

# We get an integer _size instead of a tuple size for drawing the
# mixture, then we just reshape the output
if size is None:
_size = None
else:
distshape = np.asarray(self.shape)
_size = int(np.prod(size))

# Now we must broadcast w to the shape that considers size, dist_shape
# and param_shape. However, we must take care with the cases in which
# dist_shape and param_shape overlap
if size is not None and w.shape[:len(size)] == size:
if w.shape[:len(size + dist_shape)] != (size + dist_shape):
# To allow w to broadcast, we insert new axis in between the
# "size" axis and the "mixture" axis
_w = w[(slice(None),) * len(size) + # Index the size axis
(np.newaxis,) * len(dist_shape) + # Add new axis for the dist_shape
(slice(None),)] # Close with the slice of mixture components
w = np.broadcast_to(_w, size + dist_shape + (param_shape[-1],))
elif size is not None:
w = np.broadcast_to(w, size + dist_shape + (param_shape[-1],))
else:
w = np.broadcast_to(w, dist_shape + (param_shape[-1],))

# Normalize inputs
w /= w.sum(axis=-1, keepdims=True)
# Compute the total size of the mixture's random call with size
if _size is not None:
output_size = int(_size * np.prod(dist_shape) * param_shape[-1])
else:
output_size = int(np.prod(dist_shape) * param_shape[-1])
# Get the size we need for the mixture's random call
mixture_size = int(output_size // np.prod(comp_tmp.shape))
if mixture_size == 1 and _size is None:
mixture_size = None

# Semiflatten the mixture weights. The last axis is the number of
# mixture mixture components, and the rest is all about size,
# dist_shape and broadcasting
w = np.reshape(w, (-1, w.shape[-1]))
# Normalize mixture weights
w = w / w.sum(axis=-1, keepdims=True)

w_samples = generate_samples(random_choice,
p=w,
broadcast_shape=w.shape[:-1] or (1,),
dist_shape=distshape,
size=size).squeeze()
if (size is None) or (distshape.size == 0):
with draw_context:
comp_samples = self._comp_samples(point=point, size=size)
if comp_samples.ndim > 1:
samples = np.squeeze(comp_samples[np.arange(w_samples.size), ..., w_samples])
else:
samples = np.squeeze(comp_samples[w_samples])
dist_shape=w.shape[:-1] or (1,),
size=size)
# Sample from the mixture
with draw_context:
mixed_samples = self._comp_samples(point=point,
size=mixture_size)
w_samples = w_samples.flatten()
# Semiflatten the mixture to be able to zip it with w_samples
mixed_samples = np.reshape(mixed_samples, (-1, comp_tmp.shape[-1]))
# Select the samples from the mixture
samples = np.array([mixed[choice] for choice, mixed in
zip(w_samples, mixed_samples)])
# Reshape the samples to the correct output shape
if size is None:
samples = np.reshape(samples, dist_shape)
else:
if w_samples.ndim == 1:
w_samples = np.reshape(np.tile(w_samples, size), (size,) + w_samples.shape)
samples = np.zeros((size,)+tuple(distshape))
with draw_context:
for i in range(size):
w_tmp = w_samples[i, :]
comp_tmp = self._comp_samples(point=point, size=None)
if comp_tmp.ndim > 1:
samples[i, :] = np.squeeze(comp_tmp[np.arange(w_tmp.size), ..., w_tmp])
else:
samples[i, :] = np.squeeze(comp_tmp[w_tmp])

samples = np.reshape(samples, size + dist_shape)
return samples


Expand Down
Loading