Fix bug in fit_MAP when shared variables are used in graph

jessegrabowski · jessegrabowski · commit fa49b43f92f9 · 2025-05-02T20:26:15.000+08:00
diff --git a/pymc_extras/inference/find_map.py b/pymc_extras/inference/find_map.py
@@ -15,6 +15,7 @@
 from pymc.initial_point import make_initial_point_fn
 from pymc.model.transform.optimization import freeze_dims_and_data
 from pymc.pytensorf import join_nonshared_inputs
+from pymc.sampling.jax import _replace_shared_variables
 from pymc.util import get_default_varnames
 from pytensor.compile import Function
 from pytensor.compile.mode import Mode
@@ -146,7 +147,7 @@ def _compile_grad_and_hess_to_jax(
     orig_loss_fn = f_loss.vm.jit_fn
 
     @jax.jit
-    def loss_fn_jax_grad(x, *shared):
+    def loss_fn_jax_grad(x):
         return jax.value_and_grad(lambda x: orig_loss_fn(x)[0])(x)
 
     f_loss_and_grad = loss_fn_jax_grad
@@ -301,6 +302,12 @@ def scipy_optimize_funcs_from_loss(
         point=initial_point_dict, outputs=[loss], inputs=inputs
     )
 
+    # If we use pytensor gradients, we will use the pytensor function wrapper that handles shared variables. When
+    # computing jax gradients, we discard the function wrapper, so we can't handle shared variables --> rewrite them
+    # away.
+    if use_jax_gradients:
+        [loss] = _replace_shared_variables([loss])
+
     compute_grad = use_grad and not use_jax_gradients
     compute_hess = use_hess and not use_jax_gradients
     compute_hessp = use_hessp and not use_jax_gradients
diff --git a/tests/test_find_map.py b/tests/test_find_map.py
@@ -1,5 +1,6 @@
 import numpy as np
 import pymc as pm
+import pytensor
 import pytensor.tensor as pt
 import pytest
 
@@ -101,3 +102,25 @@ def test_JAX_map(method, use_grad, use_hess, use_hessp, gradient_backend: Gradie
 
     assert np.isclose(mu_hat, 3, atol=0.5)
     assert np.isclose(np.exp(log_sigma_hat), 1.5, atol=0.5)
+
+
+def test_JAX_map_shared_variables():
+    with pm.Model() as m:
+        data = pytensor.shared(np.random.normal(loc=3, scale=1.5, size=100), name="shared_data")
+        mu = pm.Normal("mu")
+        sigma = pm.Exponential("sigma", 1)
+        y_hat = pm.Normal("y_hat", mu=mu, sigma=sigma, observed=data)
+
+        optimized_point = find_MAP(
+            method="L-BFGS-B",
+            use_grad=True,
+            use_hess=False,
+            use_hessp=False,
+            progressbar=False,
+            gradient_backend="jax",
+            compile_kwargs={"mode": "JAX"},
+        )
+    mu_hat, log_sigma_hat = optimized_point["mu"], optimized_point["sigma_log__"]
+
+    assert np.isclose(mu_hat, 3, atol=0.5)
+    assert np.isclose(np.exp(log_sigma_hat), 1.5, atol=0.5)