Address comments by @theorashid from on PR pymc-devs#385

jessegrabowski · jessegrabowski · commit 1cef2e7ba4e9 · 2025-02-04T18:06:59.000+08:00
diff --git a/pymc_extras/inference/find_map.py b/pymc_extras/inference/find_map.py
@@ -30,13 +30,17 @@
 def set_optimizer_function_defaults(method, use_grad, use_hess, use_hessp):
     method_info = MINIMIZE_MODE_KWARGS[method].copy()
 
+    if use_hess and use_hessp:
+        _log.warning(
+            'Both "use_hess" and "use_hessp" are set to True. scipy.optimize.minimize never uses both at the '
+            'same time. Setting "use_hess" to False.'
+        )
+        use_hess = False
+
     use_grad = use_grad if use_grad is not None else method_info["uses_grad"]
     use_hess = use_hess if use_hess is not None else method_info["uses_hess"]
     use_hessp = use_hessp if use_hessp is not None else method_info["uses_hessp"]
 
-    if use_hess and use_hessp:
-        use_hess = False
-
     return use_grad, use_hess, use_hessp
 
 
@@ -97,7 +101,7 @@ def _create_transformed_draws(H_inv, slices, out_shapes, posterior_draws, model,
     return f_untransform(posterior_draws)
 
 
-def _compile_jax_gradients(
+def _compile_grad_and_hess_to_jax(
     f_loss: Function, use_hess: bool, use_hessp: bool
 ) -> tuple[Callable | None, Callable | None]:
     """
@@ -152,7 +156,7 @@ def f_hess_jax(x):
     return f_loss_and_grad, f_hess, f_hessp
 
 
-def _compile_functions(
+def _compile_functions_for_scipy_optimize(
     loss: TensorVariable,
     inputs: list[TensorVariable],
     compute_grad: bool,
@@ -177,7 +181,7 @@ def _compile_functions(
     compute_hessp: bool
         Whether to compile a function that computes the Hessian-vector product of the loss function.
     compile_kwargs: dict, optional
-        Additional keyword arguments to pass to the ``pm.compile_pymc`` function.
+        Additional keyword arguments to pass to the ``pm.compile`` function.
 
     Returns
     -------
@@ -193,19 +197,19 @@ def _compile_functions(
     if compute_grad:
         grads = pytensor.gradient.grad(loss, inputs)
         grad = pt.concatenate([grad.ravel() for grad in grads])
-        f_loss_and_grad = pm.compile_pymc(inputs, [loss, grad], **compile_kwargs)
+        f_loss_and_grad = pm.compile(inputs, [loss, grad], **compile_kwargs)
     else:
-        f_loss = pm.compile_pymc(inputs, loss, **compile_kwargs)
+        f_loss = pm.compile(inputs, loss, **compile_kwargs)
         return [f_loss]
 
     if compute_hess:
         hess = pytensor.gradient.jacobian(grad, inputs)[0]
-        f_hess = pm.compile_pymc(inputs, hess, **compile_kwargs)
+        f_hess = pm.compile(inputs, hess, **compile_kwargs)
 
     if compute_hessp:
         p = pt.tensor("p", shape=inputs[0].type.shape)
         hessp = pytensor.gradient.hessian_vector_product(loss, inputs, p)
-        f_hessp = pm.compile_pymc([*inputs, p], hessp[0], **compile_kwargs)
+        f_hessp = pm.compile([*inputs, p], hessp[0], **compile_kwargs)
 
     return [f_loss_and_grad, f_hess, f_hessp]
 
@@ -240,7 +244,7 @@ def scipy_optimize_funcs_from_loss(
     gradient_backend: str, default "pytensor"
         Which backend to use to compute gradients. Must be one of "jax" or "pytensor"
     compile_kwargs:
-        Additional keyword arguments to pass to the ``pm.compile_pymc`` function.
+        Additional keyword arguments to pass to the ``pm.compile`` function.
 
     Returns
     -------
@@ -285,7 +289,7 @@ def scipy_optimize_funcs_from_loss(
     compute_hess = use_hess and not use_jax_gradients
     compute_hessp = use_hessp and not use_jax_gradients
 
-    funcs = _compile_functions(
+    funcs = _compile_functions_for_scipy_optimize(
         loss=loss,
         inputs=[flat_input],
         compute_grad=compute_grad,
@@ -301,7 +305,7 @@ def scipy_optimize_funcs_from_loss(
 
     if use_jax_gradients:
         # f_loss here is f_loss_and_grad; the name is unchanged to simplify the return values
-        f_loss, f_hess, f_hessp = _compile_jax_gradients(f_loss, use_hess, use_hessp)
+        f_loss, f_hess, f_hessp = _compile_grad_and_hess_to_jax(f_loss, use_hess, use_hessp)
 
     return f_loss, f_hess, f_hessp
 
diff --git a/pymc_extras/inference/laplace.py b/pymc_extras/inference/laplace.py
@@ -231,7 +231,7 @@ def add_data_to_inferencedata(
     return idata
 
 
-def fit_mvn_to_MAP(
+def fit_mvn_at_MAP(
     optimized_point: dict[str, np.ndarray],
     model: pm.Model | None = None,
     on_bad_cov: Literal["warn", "error", "ignore"] = "ignore",
@@ -344,8 +344,10 @@ def sample_laplace_posterior(
 
     Parameters
     ----------
-    mu
-    H_inv
+    mu: RaveledVars
+        The MAP estimate of the model parameters.
+    H_inv: np.ndarray
+        The inverse Hessian matrix of the log-posterior evaluated at the MAP estimate.
     model : Model
         A PyMC model
     chains : int
@@ -384,9 +386,7 @@ def sample_laplace_posterior(
             constrained_rvs, replace={unconstrained_vector: batched_values}
         )
 
-        f_constrain = pm.compile_pymc(
-            inputs=[batched_values], outputs=batched_rvs, **compile_kwargs
-        )
+        f_constrain = pm.compile(inputs=[batched_values], outputs=batched_rvs, **compile_kwargs)
         posterior_draws = f_constrain(posterior_draws)
 
     else:
@@ -472,15 +472,17 @@ def fit_laplace(
         and 1).
 
         .. warning::
-            This argumnet should be considered highly experimental. It has not been verified if this method produces
+            This argument should be considered highly experimental. It has not been verified if this method produces
             valid draws from the posterior. **Use at your own risk**.
 
     gradient_backend: str, default "pytensor"
         The backend to use for gradient computations. Must be one of "pytensor" or "jax".
     chains: int, default: 2
-        The number of sampling chains running in parallel.
+        The number of chain dimensions to sample. Note that this is *not* the number of chains to run in parallel,
+        because the Laplace approximation is not an MCMC method. This argument exists to ensure that outputs are
+        compatible with the ArviZ library.
     draws: int, default: 500
-        The number of samples to draw from the approximated posterior.
+        The number of samples to draw from the approximated posterior. Totals samples will be chains * draws.
     on_bad_cov : str, one of 'ignore', 'warn', or 'error', default: 'ignore'
         What to do when ``H_inv`` (inverse Hessian) is not positive semi-definite.
         If 'ignore' or 'warn', the closest positive-semi-definite matrix to ``H_inv`` (in L1 norm) will be returned.
@@ -547,7 +549,7 @@ def fit_laplace(
         **optimizer_kwargs,
     )
 
-    mu, H_inv = fit_mvn_to_MAP(
+    mu, H_inv = fit_mvn_at_MAP(
         optimized_point=optimized_point,
         model=model,
         on_bad_cov=on_bad_cov,
diff --git a/tests/test_find_map.py b/tests/test_find_map.py
@@ -54,24 +54,28 @@ def compute_z(x):
 
 
 @pytest.mark.parametrize(
-    "method, use_grad, use_hess",
+    "method, use_grad, use_hess, use_hessp",
     [
-        ("nelder-mead", False, False),
-        ("powell", False, False),
-        ("CG", True, False),
-        ("BFGS", True, False),
-        ("L-BFGS-B", True, False),
-        ("TNC", True, False),
-        ("SLSQP", True, False),
-        ("dogleg", True, True),
-        ("trust-ncg", True, True),
-        ("trust-exact", True, True),
-        ("trust-krylov", True, True),
-        ("trust-constr", True, True),
+        ("nelder-mead", False, False, False),
+        ("powell", False, False, False),
+        ("CG", True, False, False),
+        ("BFGS", True, False, False),
+        ("L-BFGS-B", True, False, False),
+        ("TNC", True, False, False),
+        ("SLSQP", True, False, False),
+        ("dogleg", True, True, False),
+        ("Newton-CG", True, True, False),
+        ("Newton-CG", True, False, True),
+        ("trust-ncg", True, True, False),
+        ("trust-ncg", True, False, True),
+        ("trust-exact", True, True, False),
+        ("trust-krylov", True, True, False),
+        ("trust-krylov", True, False, True),
+        ("trust-constr", True, True, False),
     ],
 )
 @pytest.mark.parametrize("gradient_backend", ["jax", "pytensor"], ids=str)
-def test_JAX_map(method, use_grad, use_hess, gradient_backend: GradientBackend, rng):
+def test_JAX_map(method, use_grad, use_hess, use_hessp, gradient_backend: GradientBackend, rng):
     extra_kwargs = {}
     if method == "dogleg":
         # HACK -- dogleg requires that the hessian of the objective function is PSD, so we have to pick a point
@@ -88,6 +92,7 @@ def test_JAX_map(method, use_grad, use_hess, gradient_backend: GradientBackend,
             **extra_kwargs,
             use_grad=use_grad,
             use_hess=use_hess,
+            use_hessp=use_hessp,
             progressbar=False,
             gradient_backend=gradient_backend,
             compile_kwargs={"mode": "JAX"},
diff --git a/tests/test_laplace.py b/tests/test_laplace.py
@@ -22,7 +22,7 @@
 from pymc_extras.inference.find_map import find_MAP
 from pymc_extras.inference.laplace import (
     fit_laplace,
-    fit_mvn_to_MAP,
+    fit_mvn_at_MAP,
     sample_laplace_posterior,
 )
 
@@ -137,7 +137,7 @@ def test_fit_laplace_coords(rng, transform_samples, mode):
         for value in optimized_point.values():
             assert value.shape == (3,)
 
-        mu, H_inv = fit_mvn_to_MAP(
+        mu, H_inv = fit_mvn_at_MAP(
             optimized_point=optimized_point,
             model=model,
             transform_samples=transform_samples,