diff --git a/RELEASE-NOTES.md b/RELEASE-NOTES.md index 89154af9e9..14f98a2a76 100644 --- a/RELEASE-NOTES.md +++ b/RELEASE-NOTES.md @@ -12,6 +12,7 @@ - Used `numpy.vectorize` in `distributions.distribution._compile_theano_function`. This enables `sample_prior_predictive` and `sample_posterior_predictive` to ask for tuples of samples instead of just integers. This fixes issue #3422. ### Maintenance +- Fixed an issue in `model_graph` that caused construction of the graph of the model for rendering to hang: replaced a search over the powerset of the nodes with a breadth-first search over the nodes. Fix for #3458. - All occurances of `sd` as a parameter name have been renamed to `sigma`. `sd` will continue to function for backwards compatibility. - Made `BrokenPipeError` for parallel sampling more verbose on Windows. - Added the `broadcast_distribution_samples` function that helps broadcasting arrays of drawn samples, taking into account the requested `size` and the inferred distribution shape. This sometimes is needed by distributions that call several `rvs` separately within their `random` method, such as the `ZeroInflatedPoisson` (Fix issue #3310). diff --git a/pymc3/model_graph.py b/pymc3/model_graph.py index e8537e9908..0ca440c521 100644 --- a/pymc3/model_graph.py +++ b/pymc3/model_graph.py @@ -1,11 +1,18 @@ import itertools +from collections import deque +from typing import Iterator, Optional, MutableSet -from theano.gof.graph import ancestors +from theano.gof.graph import stack_search from theano.compile import SharedVariable +from theano.tensor import Tensor from .util import get_default_varnames import pymc3 as pm +# this is a placeholder for a better characterization of the type +# of variables in a model. +RV = Tensor + def powerset(iterable): """All *nonempty* subsets of an iterable. @@ -27,7 +34,7 @@ def __init__(self, model): self._deterministics = None def get_deterministics(self, var): - """Compute the deterministic nodes of the graph""" + """Compute the deterministic nodes of the graph, **not** including var itself.""" deterministics = [] attrs = ('transformed', 'logpt') for v in self.var_list: @@ -35,29 +42,33 @@ def get_deterministics(self, var): deterministics.append(v) return deterministics - def _ancestors(self, var, func, blockers=None): - """Get ancestors of a function that are also named PyMC3 variables""" - return set([j for j in ancestors([func], blockers=blockers) if j in self.var_list and j != var]) + def _get_ancestors(self, var, func) -> MutableSet[RV]: + """Get all ancestors of a function, doing some accounting for deterministics. + """ - def _get_ancestors(self, var, func): - """Get all ancestors of a function, doing some accounting for deterministics + # this contains all of the variables in the model EXCEPT var... + vars: MutableSet[RV] = set(self.var_list) + vars.remove(var) + + blockers: MutableSet[RV] = set() + retval = set() + def _expand(node) -> Optional[Iterator[Tensor]]: + if node in blockers: + return None + elif node in vars: + blockers.add(node) + retval.add(node) + return None + elif node.owner: + blockers.add(node) + return reversed(node.owner.inputs) + else: + return None - Specifically, if a deterministic is an input, theano.gof.graph.ancestors will - return only the inputs *to the deterministic*. However, if we pass in the - deterministic as a blocker, it will skip those nodes. - """ - deterministics = self.get_deterministics(var) - upstream = self._ancestors(var, func) - - # Usual case - if upstream == self._ancestors(var, func, blockers=upstream): - return upstream - else: # deterministic accounting - for d in powerset(upstream): - blocked = self._ancestors(var, func, blockers=d) - if set(d) == blocked: - return d - raise RuntimeError('Could not traverse graph. Consider raising an issue with developers.') + stack_search(start = deque([func]), + expand=_expand, + mode='bfs') + return retval def _filter_parents(self, var, parents): """Get direct parents of a var, as strings"""