Skip to content

Commit dfc9102

Browse files
Fixed leaf_nodes computation (#3645)
* Fixed leaf_nodes computation * Refactored get_named_nodes_and_relations to make it consistent with theano naming * Added test for #3643 * Added release notes * Fixed float32 error * Applied changes suggested by @rpgoldman * Fixed ignored nodes bug * Modify docstrings for numpy style. Noticed @junpenglao's comment and modified for numpy formatting. * Changed leaves to leaf_dict as per @junpeglao's suggestion Co-authored-by: rpgoldman <[email protected]>
1 parent a164f72 commit dfc9102

File tree

4 files changed

+145
-67
lines changed

4 files changed

+145
-67
lines changed

RELEASE-NOTES.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,8 @@
4242
- Added `theano.gof.graph.Constant` to type checks done in `_draw_value` (fixes issue [3595](https://github.com/pymc-devs/pymc3/issues/3595))
4343
- `HalfNormal` did not used to work properly in `draw_values`, `sample_prior_predictive`, or `sample_posterior_predictive` (fixes issue [3686](https://github.com/pymc-devs/pymc3/pull/3686))
4444
- Random variable transforms were inadvertently left out of the API documentation. Added them. (See PR [3690](https://github.com/pymc-devs/pymc3/pull/3690)).
45+
- Refactored `pymc3.model.get_named_nodes_and_relations` to use the ancestors and descendents, in a way that is consistent with `theano`'s naming convention.
46+
- Changed the way in which `pymc3.model.get_named_nodes_and_relations` computes nodes without ancestors to make it robust to changes in var_name orderings (issue [#3643](https://github.com/pymc-devs/pymc3/issues/3643))
4547

4648
## PyMC3 3.7 (May 29 2019)
4749

pymc3/distributions/distribution.py

Lines changed: 15 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@
2020
import theano
2121
from ..memoize import memoize
2222
from ..model import (
23-
Model, get_named_nodes_and_relations, FreeRV,
23+
Model, build_named_node_tree, FreeRV,
2424
ObservedRV, MultiObservedRV, ContextMeta
2525
)
2626
from ..vartypes import string_types, theano_constant
@@ -569,31 +569,19 @@ def draw_values(params, point=None, size=None):
569569
# Distribution parameters may be nodes which have named node-inputs
570570
# specified in the point. Need to find the node-inputs, their
571571
# parents and children to replace them.
572-
leaf_nodes = {}
573-
named_nodes_parents = {}
574-
named_nodes_children = {}
575-
for _, param in symbolic_params:
576-
if hasattr(param, 'name'):
577-
# Get the named nodes under the `param` node
578-
nn, nnp, nnc = get_named_nodes_and_relations(param)
579-
leaf_nodes.update(nn)
580-
# Update the discovered parental relationships
581-
for k in nnp.keys():
582-
if k not in named_nodes_parents.keys():
583-
named_nodes_parents[k] = nnp[k]
584-
else:
585-
named_nodes_parents[k].update(nnp[k])
586-
# Update the discovered child relationships
587-
for k in nnc.keys():
588-
if k not in named_nodes_children.keys():
589-
named_nodes_children[k] = nnc[k]
590-
else:
591-
named_nodes_children[k].update(nnc[k])
572+
leaf_nodes, named_nodes_descendents, named_nodes_ancestors = (
573+
build_named_node_tree(
574+
(
575+
param for _, param in symbolic_params
576+
if hasattr(param, "name")
577+
)
578+
)
579+
)
592580

593581
# Init givens and the stack of nodes to try to `_draw_value` from
594582
givens = {p.name: (p, v) for (p, size), v in drawn.items()
595583
if getattr(p, 'name', None) is not None}
596-
stack = list(leaf_nodes.values()) # A queue would be more appropriate
584+
stack = list(leaf_nodes.values())
597585
while stack:
598586
next_ = stack.pop(0)
599587
if (next_, size) in drawn:
@@ -614,7 +602,7 @@ def draw_values(params, point=None, size=None):
614602
# of TensorConstants or SharedVariables, we must add them
615603
# to the stack or risk evaluating deterministics with the
616604
# wrong values (issue #3354)
617-
stack.extend([node for node in named_nodes_parents[next_]
605+
stack.extend([node for node in named_nodes_descendents[next_]
618606
if isinstance(node, (ObservedRV,
619607
MultiObservedRV))
620608
and (node, size) not in drawn])
@@ -623,7 +611,7 @@ def draw_values(params, point=None, size=None):
623611
# If the node does not have a givens value, try to draw it.
624612
# The named node's children givens values must also be taken
625613
# into account.
626-
children = named_nodes_children[next_]
614+
children = named_nodes_ancestors[next_]
627615
temp_givens = [givens[k] for k in givens if k in children]
628616
try:
629617
# This may fail for autotransformed RVs, which don't
@@ -638,7 +626,7 @@ def draw_values(params, point=None, size=None):
638626
# The node failed, so we must add the node's parents to
639627
# the stack of nodes to try to draw from. We exclude the
640628
# nodes in the `params` list.
641-
stack.extend([node for node in named_nodes_parents[next_]
629+
stack.extend([node for node in named_nodes_descendents[next_]
642630
if node is not None and
643631
(node, size) not in drawn])
644632

@@ -662,8 +650,8 @@ def draw_values(params, point=None, size=None):
662650
# This may set values for certain nodes in the drawn
663651
# dictionary, but they don't get added to the givens
664652
# dictionary. Here, we try to fix that.
665-
if param in named_nodes_children:
666-
for node in named_nodes_children[param]:
653+
if param in named_nodes_ancestors:
654+
for node in named_nodes_ancestors[param]:
667655
if (
668656
node.name not in givens and
669657
(node, size) in drawn

pymc3/model.py

Lines changed: 102 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -105,6 +105,7 @@ def incorporate_methods(source, destination, methods,
105105
else:
106106
setattr(destination, method, None)
107107

108+
108109
def get_named_nodes_and_relations(graph):
109110
"""Get the named nodes in a theano graph (i.e., nodes whose name
110111
attribute is not None) along with their relationships (i.e., the
@@ -113,67 +114,128 @@ def get_named_nodes_and_relations(graph):
113114
114115
Parameters
115116
----------
116-
graph - a theano node
117+
graph: a theano node
117118
118119
Returns:
119-
leaf_nodes: A dictionary of name:node pairs, of the named nodes that
120-
are also leafs of the graph
121-
node_parents: A dictionary of node:set([parents]) pairs. Each key is
122-
a theano named node, and the corresponding value is the set of
123-
theano named nodes that are parents of the node. These parental
124-
relations skip unnamed intermediate nodes.
125-
node_children: A dictionary of node:set([children]) pairs. Each key
120+
--------
121+
leaf_dict: Dict[str, node]
122+
A dictionary of name:node pairs, of the named nodes that
123+
have no named ancestors in the provided theano graph.
124+
descendents: Dict[node, Set[node]]
125+
Each key is a theano named node, and the corresponding value
126+
is the set of theano named nodes that are descendents with no
127+
intervening named nodes in the supplied ``graph``.
128+
ancestors: Dict[node, Set[node]]
129+
A dictionary of node:set([ancestors]) pairs. Each key
126130
is a theano named node, and the corresponding value is the set
127-
of theano named nodes that are children of the node. These child
128-
relations skip unnamed intermediate nodes.
131+
of theano named nodes that are ancestors with no intervening named
132+
nodes in the supplied ``graph``.
129133
130134
"""
135+
# We don't enforce distribution parameters to have a name but we may
136+
# attempt to get_named_nodes_and_relations from them anyway in
137+
# distributions.draw_values. This means that must take care only to add
138+
# graph to the ancestors and descendents dictionaries if it has a name.
131139
if graph.name is not None:
132-
node_parents = {graph: set()}
133-
node_children = {graph: set()}
140+
ancestors = {graph: set()}
141+
descendents = {graph: set()}
134142
else:
135-
node_parents = {}
136-
node_children = {}
137-
return _get_named_nodes_and_relations(graph, None, {}, node_parents, node_children)
138-
139-
def _get_named_nodes_and_relations(graph, parent, leaf_nodes,
140-
node_parents, node_children):
143+
ancestors = {}
144+
descendents = {}
145+
descendents, ancestors = _get_named_nodes_and_relations(
146+
graph, None, ancestors, descendents
147+
)
148+
leaf_dict = {
149+
node.name: node for node, ancestor in ancestors.items()
150+
if len(ancestor) == 0
151+
}
152+
return leaf_dict, descendents, ancestors
153+
154+
155+
def _get_named_nodes_and_relations(graph, descendent, descendents, ancestors):
141156
if getattr(graph, 'owner', None) is None: # Leaf node
142157
if graph.name is not None: # Named leaf node
143-
leaf_nodes.update({graph.name: graph})
144-
if parent is not None: # Is None for the root node
158+
if descendent is not None: # Is None for the first node
145159
try:
146-
node_parents[graph].add(parent)
160+
descendents[graph].add(descendent)
147161
except KeyError:
148-
node_parents[graph] = {parent}
149-
node_children[parent].add(graph)
162+
descendents[graph] = {descendent}
163+
ancestors[descendent].add(graph)
150164
else:
151-
node_parents[graph] = set()
165+
descendents[graph] = set()
152166
# Flag that the leaf node has no children
153-
node_children[graph] = set()
167+
ancestors[graph] = set()
154168
else: # Intermediate node
155169
if graph.name is not None: # Intermediate named node
156-
if parent is not None: # Is only None for the root node
170+
if descendent is not None: # Is only None for the root node
157171
try:
158-
node_parents[graph].add(parent)
172+
descendents[graph].add(descendent)
159173
except KeyError:
160-
node_parents[graph] = {parent}
161-
node_children[parent].add(graph)
174+
descendents[graph] = {descendent}
175+
ancestors[descendent].add(graph)
162176
else:
163-
node_parents[graph] = set()
164-
# The current node will be set as the parent of the next
177+
descendents[graph] = set()
178+
# The current node will be set as the descendent of the next
165179
# nodes only if it is a named node
166-
parent = graph
180+
descendent = graph
167181
# Init the nodes children to an empty set
168-
node_children[graph] = set()
182+
ancestors[graph] = set()
169183
for i in graph.owner.inputs:
170-
temp_nodes, temp_inter, temp_tree = \
171-
_get_named_nodes_and_relations(i, parent, leaf_nodes,
172-
node_parents, node_children)
173-
leaf_nodes.update(temp_nodes)
174-
node_parents.update(temp_inter)
175-
node_children.update(temp_tree)
176-
return leaf_nodes, node_parents, node_children
184+
temp_desc, temp_ances = _get_named_nodes_and_relations(
185+
i, descendent, descendents, ancestors
186+
)
187+
descendents.update(temp_desc)
188+
ancestors.update(temp_ances)
189+
return descendents, ancestors
190+
191+
192+
def build_named_node_tree(graphs):
193+
"""Build the combined descence/ancestry tree of named nodes (i.e., nodes
194+
whose name attribute is not None) in a list (or iterable) of theano graphs.
195+
The relationship tree does not include unnamed intermediate nodes present
196+
in the supplied graphs.
197+
198+
Parameters
199+
----------
200+
graphs - iterable of theano graphs
201+
202+
Returns:
203+
--------
204+
leaf_dict: Dict[str, node]
205+
A dictionary of name:node pairs, of the named nodes that
206+
have no named ancestors in the provided theano graphs.
207+
descendents: Dict[node, Set[node]]
208+
A dictionary of node:set([parents]) pairs. Each key is
209+
a theano named node, and the corresponding value is the set of
210+
theano named nodes that are descendents with no intervening named
211+
nodes in the supplied ``graphs``.
212+
ancestors: Dict[node, Set[node]]
213+
A dictionary of node:set([ancestors]) pairs. Each key
214+
is a theano named node, and the corresponding value is the set
215+
of theano named nodes that are ancestors with no intervening named
216+
nodes in the supplied ``graphs``.
217+
218+
"""
219+
leaf_dict = {}
220+
named_nodes_descendents = {}
221+
named_nodes_ancestors = {}
222+
for graph in graphs:
223+
# Get the named nodes under the `param` node
224+
nn, nnd, nna = get_named_nodes_and_relations(graph)
225+
leaf_dict.update(nn)
226+
# Update the discovered parental relationships
227+
for k in nnd.keys():
228+
if k not in named_nodes_descendents.keys():
229+
named_nodes_descendents[k] = nnd[k]
230+
else:
231+
named_nodes_descendents[k].update(nnd[k])
232+
# Update the discovered child relationships
233+
for k in nna.keys():
234+
if k not in named_nodes_ancestors.keys():
235+
named_nodes_ancestors[k] = nna[k]
236+
else:
237+
named_nodes_ancestors[k].update(nna[k])
238+
return leaf_dict, named_nodes_descendents, named_nodes_ancestors
177239

178240
T = TypeVar('T', bound='ContextMeta')
179241

pymc3/tests/test_sampling.py

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -436,6 +436,32 @@ def test_deterministic_of_observed(self):
436436
rtol = 1e-5 if theano.config.floatX == "float64" else 1e-3
437437
assert np.allclose(ppc["in_1"] + ppc["in_2"], ppc["out"], rtol=rtol)
438438

439+
def test_var_name_order_invariance(self):
440+
# Issue #3643 exposed a bug in sample_posterior_predictive, which made
441+
# it return incorrect values depending on the order of the supplied
442+
# var_names. This tests that sample_posterior_predictive is robust
443+
# to different var_names orders.
444+
obs_a = theano.shared(pm.theanof.floatX(np.array([10., 20., 30.])))
445+
with pm.Model() as m:
446+
pm.Normal('mu', 3, 5)
447+
a = pm.Normal('a', 20, 10, observed=obs_a)
448+
pm.Deterministic('b', a * 2)
449+
trace = pm.sample(10)
450+
451+
np.random.seed(123)
452+
var_names = ['b', 'a']
453+
ppc1 = pm.sample_posterior_predictive(
454+
trace, model=m, var_names=var_names
455+
)
456+
np.random.seed(123)
457+
var_names = ['a', 'b']
458+
ppc2 = pm.sample_posterior_predictive(
459+
trace, model=m, var_names=var_names
460+
)
461+
assert np.all(ppc1["a"] == ppc2["a"])
462+
assert np.all(ppc1["b"] == ppc2["b"])
463+
assert np.allclose(ppc1["b"], (2 * ppc1["a"]))
464+
439465
def test_deterministic_of_observed_modified_interface(self):
440466
meas_in_1 = pm.theanof.floatX(2 + 4 * np.random.randn(100))
441467
meas_in_2 = pm.theanof.floatX(5 + 4 * np.random.randn(100))

0 commit comments

Comments
 (0)