10
10
from pymc .distributions .discrete import Bernoulli , Categorical , DiscreteUniform
11
11
from pymc .distributions .transforms import Chain
12
12
from pymc .logprob .abstract import _logprob
13
- from pymc .logprob .basic import conditional_logp
13
+ from pymc .logprob .basic import conditional_logp , logp
14
14
from pymc .logprob .transforms import IntervalTransform
15
15
from pymc .model import Model
16
16
from pymc .pytensorf import compile_pymc , constant_fold , inputvars
17
17
from pymc .util import _get_seeds_per_chain , dataset_to_point_list , treedict
18
- from pytensor import Mode
18
+ from pytensor import Mode , scan
19
19
from pytensor .compile import SharedVariable
20
20
from pytensor .compile .builders import OpFromGraph
21
- from pytensor .graph import (
22
- Constant ,
23
- FunctionGraph ,
24
- ancestors ,
25
- clone_replace ,
26
- vectorize_graph ,
27
- )
21
+ from pytensor .graph import Constant , FunctionGraph , ancestors , clone_replace
22
+ from pytensor .graph .replace import vectorize_graph
28
23
from pytensor .scan import map as scan_map
29
24
from pytensor .tensor import TensorType , TensorVariable
30
25
from pytensor .tensor .elemwise import Elemwise
33
28
34
29
__all__ = ["MarginalModel" ]
35
30
31
+ from pymc_experimental .distributions import DiscreteMarkovChain
32
+
36
33
37
34
class MarginalModel (Model ):
38
35
"""Subclass of PyMC Model that implements functionality for automatic
@@ -245,16 +242,25 @@ def marginalize(
245
242
self [var ] if isinstance (var , str ) else var for var in rvs_to_marginalize
246
243
]
247
244
248
- supported_dists = (Bernoulli , Categorical , DiscreteUniform )
249
245
for rv_to_marginalize in rvs_to_marginalize :
250
246
if rv_to_marginalize not in self .free_RVs :
251
247
raise ValueError (
252
248
f"Marginalized RV { rv_to_marginalize } is not a free RV in the model"
253
249
)
254
- if not isinstance (rv_to_marginalize .owner .op , supported_dists ):
250
+
251
+ rv_op = rv_to_marginalize .owner .op
252
+ if isinstance (rv_op , DiscreteMarkovChain ):
253
+ if rv_op .n_lags > 1 :
254
+ raise NotImplementedError (
255
+ "Marginalization for DiscreteMarkovChain with n_lags > 1 is not supported"
256
+ )
257
+ if rv_to_marginalize .owner .inputs [0 ].type .ndim > 2 :
258
+ raise NotImplementedError (
259
+ "Marginalization for DiscreteMarkovChain with non-matrix transition probability is not supported"
260
+ )
261
+ elif not isinstance (rv_op , (Bernoulli , Categorical , DiscreteUniform )):
255
262
raise NotImplementedError (
256
- f"RV with distribution { rv_to_marginalize .owner .op } cannot be marginalized. "
257
- f"Supported distribution include { supported_dists } "
263
+ f"Marginalization of RV with distribution { rv_to_marginalize .owner .op } is not supported"
258
264
)
259
265
260
266
if rv_to_marginalize .name in self .named_vars_to_dims :
@@ -490,6 +496,10 @@ class FiniteDiscreteMarginalRV(MarginalRV):
490
496
"""Base class for Finite Discrete Marginalized RVs"""
491
497
492
498
499
+ class DiscreteMarginalMarkovChainRV (MarginalRV ):
500
+ """Base class for Discrete Marginal Markov Chain RVs"""
501
+
502
+
493
503
def static_shape_ancestors (vars ):
494
504
"""Identify ancestors Shape Ops of static shapes (therefore constant in a valid graph)."""
495
505
return [
@@ -618,11 +628,17 @@ def replace_finite_discrete_marginal_subgraph(fgraph, rv_to_marginalize, all_rvs
618
628
replace_inputs .update ({input_rv : input_rv .type () for input_rv in input_rvs })
619
629
cloned_outputs = clone_replace (outputs , replace = replace_inputs )
620
630
621
- marginalization_op = FiniteDiscreteMarginalRV (
631
+ if isinstance (rv_to_marginalize .owner .op , DiscreteMarkovChain ):
632
+ marginalize_constructor = DiscreteMarginalMarkovChainRV
633
+ else :
634
+ marginalize_constructor = FiniteDiscreteMarginalRV
635
+
636
+ marginalization_op = marginalize_constructor (
622
637
inputs = list (replace_inputs .values ()),
623
638
outputs = cloned_outputs ,
624
639
ndim_supp = ndim_supp ,
625
640
)
641
+
626
642
marginalized_rvs = marginalization_op (* replace_inputs .keys ())
627
643
fgraph .replace_all (tuple (zip (rvs_to_marginalize , marginalized_rvs )))
628
644
return rvs_to_marginalize , marginalized_rvs
@@ -638,6 +654,9 @@ def get_domain_of_finite_discrete_rv(rv: TensorVariable) -> Tuple[int, ...]:
638
654
elif isinstance (op , DiscreteUniform ):
639
655
lower , upper = constant_fold (rv .owner .inputs [3 :])
640
656
return tuple (range (lower , upper + 1 ))
657
+ elif isinstance (op , DiscreteMarkovChain ):
658
+ p = rv .owner .inputs [0 ]
659
+ return tuple (range (pt .get_vector_length (p [- 1 ])))
641
660
642
661
raise NotImplementedError (f"Cannot compute domain for op { op } " )
643
662
@@ -728,3 +747,69 @@ def logp_fn(marginalized_rv_const, *non_sequences):
728
747
729
748
# We have to add dummy logps for the remaining value variables, otherwise PyMC will raise
730
749
return joint_logps , * (pt .constant (0 ),) * (len (values ) - 1 )
750
+
751
+
752
+ @_logprob .register (DiscreteMarginalMarkovChainRV )
753
+ def marginal_hmm_logp (op , values , * inputs , ** kwargs ):
754
+
755
+ marginalized_rvs_node = op .make_node (* inputs )
756
+ inner_rvs = clone_replace (
757
+ op .inner_outputs ,
758
+ replace = {u : v for u , v in zip (op .inner_inputs , marginalized_rvs_node .inputs )},
759
+ )
760
+
761
+ chain_rv , * dependent_rvs = inner_rvs
762
+ P , n_steps_ , init_dist_ , rng = chain_rv .owner .inputs
763
+ domain = pt .arange (P .shape [- 1 ], dtype = "int32" )
764
+
765
+ # Construct logp in two steps
766
+ # Step 1: Compute the probability of the data ("emissions") under every possible state (vec_logp_emission)
767
+
768
+ # First we need to vectorize the conditional logp graph of the data, in case there are batch dimensions floating
769
+ # around. To do this, we need to break the dependency between chain and the init_dist_ random variable. Otherwise,
770
+ # PyMC will detect a random variable in the logp graph (init_dist_), that isn't relevant at this step.
771
+ chain_value = chain_rv .clone ()
772
+ dependent_rvs = clone_replace (dependent_rvs , {chain_rv : chain_value })
773
+ logp_emissions_dict = conditional_logp (dict (zip (dependent_rvs , values )))
774
+
775
+ # Reduce and add the batch dims beyond the chain dimension
776
+ reduced_logp_emissions = _add_reduce_batch_dependent_logps (
777
+ chain_rv .type , logp_emissions_dict .values ()
778
+ )
779
+
780
+ # Add a batch dimension for the domain of the chain
781
+ chain_shape = constant_fold (tuple (chain_rv .shape ))
782
+ batch_chain_value = pt .moveaxis (pt .full ((* chain_shape , domain .size ), domain ), - 1 , 0 )
783
+ batch_logp_emissions = vectorize_graph (reduced_logp_emissions , {chain_value : batch_chain_value })
784
+
785
+ # Step 2: Compute the transition probabilities
786
+ # This is the "forward algorithm", alpha_t = p(y | s_t) * sum_{s_{t-1}}(p(s_t | s_{t-1}) * alpha_{t-1})
787
+ # We do it entirely in logs, though.
788
+
789
+ # To compute the prior probabilities of each state, we evaluate the logp of the domain (all possible states) under
790
+ # the initial distribution. This is robust to everything the user can throw at it.
791
+ batch_logp_init_dist = pt .vectorize (lambda x : logp (init_dist_ , x ), "()->()" )(
792
+ batch_chain_value [..., 0 ]
793
+ )
794
+ log_alpha_init = batch_logp_init_dist + batch_logp_emissions [..., 0 ]
795
+
796
+ def step_alpha (logp_emission , log_alpha , log_P ):
797
+ step_log_prob = pt .logsumexp (log_alpha [:, None ] + log_P , axis = 0 )
798
+ return logp_emission + step_log_prob
799
+
800
+ P_bcast_dims = (len (chain_shape ) - 1 ) - (P .type .ndim - 2 )
801
+ log_P = pt .shape_padright (pt .log (P ), P_bcast_dims )
802
+ log_alpha_seq , _ = scan (
803
+ step_alpha ,
804
+ non_sequences = [log_P ],
805
+ outputs_info = [log_alpha_init ],
806
+ # Scan needs the time dimension first, and we already consumed the 1st logp computing the initial value
807
+ sequences = pt .moveaxis (batch_logp_emissions [..., 1 :], - 1 , 0 ),
808
+ )
809
+ # Final logp is just the sum of the last scan state
810
+ joint_logp = pt .logsumexp (log_alpha_seq [- 1 ], axis = 0 )
811
+
812
+ # If there are multiple emission streams, we have to add dummy logps for the remaining value variables. The first
813
+ # return is the joint probability of everything together, but PyMC still expects one logp for each one.
814
+ dummy_logps = (pt .constant (0 ),) * (len (values ) - 1 )
815
+ return joint_logp , * dummy_logps
0 commit comments