Fix typos, add YouTube example as test

jessegrabowski · jessegrabowski · commit 178be94a8cf1 · 2023-11-21T21:23:14.000+01:00
diff --git a/pymc_experimental/marginal_model.py b/pymc_experimental/marginal_model.py
@@ -502,9 +502,6 @@ def logp_fn(marginalized_rv_const, *non_sequences):
 
 @_logprob.register(DiscreteMarginalMarkovChainRV)
 def finite_discrete_marginal_rv_logp(op, values, *inputs, **kwargs):
-    def step_alpha(log_alpha, log_P):
-        return pt.logsumexp(log_alpha[:, None] + log_P, 0)
-
     def eval_logp(x):
         return logp(init_dist_, x)
 
@@ -520,7 +517,9 @@ def eval_logp(x):
     domain = pt.arange(P_.shape[0], dtype="int32")
 
     vec_eval_logp = pt.vectorize(eval_logp, "()->()")
-    logp_init = vec_eval_logp(domain)
+
+    log_P_ = pt.log(P_)
+    log_alpha_init = vec_eval_logp(domain) + log_P_
 
     # Construct logp in two steps
     # Step 1: Compute the probability of the data ("emissions") under every possible state (vec_logp_emission)
@@ -543,19 +542,28 @@ def eval_logp(x):
     # Step 2: Compute the transition probabilities
     # This is the "forward algorithm", alpha_t = sum(p(s_t | s_{t-1}) * alpha_{t-1})
     # We do it entirely in logs, though.
+    def step_alpha(logp_emission, log_alpha, log_P):
+
+        return pt.logsumexp(log_alpha[:, None] + log_P, 0)
+
     log_alpha_seq, _ = scan(
-        step_alpha, non_sequences=[pt.log(P_)], outputs_info=[logp_init], n_steps=n_steps_
+        step_alpha,
+        non_sequences=[log_P_],
+        outputs_info=[log_alpha_init],
+        sequences=pt.moveaxis(vec_logp_emission, -1, 0),
     )
 
     # Scan works over the T dimension, so output is (T, k). We need to swap to (k, T)
-    log_alpha_seq = pt.moveaxis(pt.concatenate([logp_init[None], log_alpha_seq], axis=0), -1, 0)
+    log_alpha_seq = pt.moveaxis(
+        pt.concatenate([log_alpha_init, log_alpha_seq[..., -1]], axis=0), -1, 0
+    )
 
     # Final logp is the sum of the sum of the emission probs and the transition probabilities
     # pt.add is used in case there are multiple emissions that depend on the same markov chain; in this case, we compute
     # the joint probability of seeing everything together.
-    joint_log_obs_given_states = pt.logsumexp(pt.add(*vec_logp_emission) + log_alpha_seq, axis=0)
+    joint_log_obs_given_states = pt.logsumexp(log_alpha_seq, axis=0)
 
     # If there are multple emisson streams, we have to add dummy logps for the remaining value variables. The first
     # return is the joint probability of everything together, but PyMC still expects one logp for each one.
-    dummy_logps = (pt.constant(np.zeros((4,))),) * (len(values) - 1)
+    dummy_logps = (pt.constant(np.zeros(shape=())),) * (len(values) - 1)
     return joint_log_obs_given_states, *dummy_logps
diff --git a/pymc_experimental/tests/test_marginal_model.py b/pymc_experimental/tests/test_marginal_model.py
@@ -473,7 +473,7 @@ def dist(idx, size):
         np.testing.assert_allclose(logp_fn(pt), ref_logp_fn(pt))
 
 
-def test_marginalized_hmm_with_one_emission():
+def test_marginalized_hmm_with_one_normal_emission():
     with MarginalModel() as m:
         P = [[0, 1], [1, 0]]
         init_dist = pm.Categorical.dist(p=[1, 0])
@@ -489,6 +489,20 @@ def test_marginalized_hmm_with_one_emission():
     np.testing.assert_allclose(logp_fn({f"emission": test_value}), expected_logp)
 
 
+def test_marginalized_hmm_one_bernoulli_emission():
+    with MarginalModel() as m:
+        P = np.array([[0.5, 0.5], [0.3, 0.7]])
+        init_dist = pm.Categorical.dist(p=[0.375, 0.625])
+        chain = DiscreteMarkovChain("chain", P=P, init_dist=init_dist, steps=2)
+        emission = pm.Bernoulli("emission", p=pt.where(chain, 0.2, 0.6))
+    m.marginalize([chain])
+
+    test_value = np.array([0, 0, 1])
+    expected_logp = -5.8774646585368675
+    logp_fn = m.compile_logp()
+    np.testing.assert_allclose(logp_fn({f"emission": test_value}), expected_logp)
+
+
 def test_marginalized_hmm_with_many_emissions():
     with MarginalModel() as m:
         P = [[0, 1], [1, 0]]