SMC: change scaling computation (#3594)

aloctavodia · web-flow · commit 879cb49ff0e7 · 2019-08-17T13:12:28.000-03:00
* change scaling computation

* add release notes

* remove comma
diff --git a/RELEASE-NOTES.md b/RELEASE-NOTES.md
@@ -17,6 +17,7 @@
 - Fixed a defect in `OrderedLogistic.__init__` that unnecessarily increased the dimensionality of the underlying `p`. Related to issue issue [#3535](https://github.com/pymc-devs/pymc3/issues/3535) but was not the true cause of it.
 - SMC: stabilize covariance matrix [3573](https://github.com/pymc-devs/pymc3/pull/3573)
 - SMC is no longer a step method of `pm.sample` now it should be called using `pm.sample_smc` [3579](https://github.com/pymc-devs/pymc3/pull/3579)
+- SMC: improve computation of the proposal scaling factor [3594](https://github.com/pymc-devs/pymc3/pull/3594)
 - Now uses `multiprocessong` rather than `psutil` to count CPUs, which results in reliable core counts on Chromebooks.
 - `sample_posterior_predictive` now preallocates the memory required for its output to improve memory usage. Addresses problems raised in this [discourse thread](https://discourse.pymc.io/t/memory-error-with-posterior-predictive-sample/2891/4).
 - Fixed a bug in `Categorical.logp`. In the case of multidimensional `p`'s, the indexing was done wrong leading to incorrectly shaped tensors that consumed `O(n**2)` memory instead of `O(n)`. This fixes issue [#3535](https://github.com/pymc-devs/pymc3/issues/3535)
diff --git a/pymc3/smc/smc.py b/pymc3/smc/smc.py
@@ -189,14 +189,17 @@ def sample_smc(
     elif kernel.lower() == "metropolis":
         likelihood_logp = logp_forw([model.datalogpt], variables, shared)
 
+    if parallel and cores > 1:
+        pool = mp.Pool(processes=cores)
+
     while beta < 1:
         if parallel and cores > 1:
-            pool = mp.Pool(processes=cores)
             results = pool.starmap(likelihood_logp, [(sample,) for sample in posterior])
         else:
             results = [likelihood_logp(sample) for sample in posterior]
         likelihoods = np.array(results).squeeze()
         beta, old_beta, weights, sj = calc_beta(beta, likelihoods, threshold)
+
         model.marginal_likelihood *= sj
         # resample based on plausibility weights (selection)
         resampling_indexes = np.random.choice(np.arange(draws), size=draws, p=weights)
@@ -211,22 +214,14 @@ def sample_smc(
         # acceptance rate of the previous stage
         if (tune_scaling or tune_steps) and stage > 0:
             scaling, n_steps = _tune(
-                acc_rate,
-                proposed,
-                tune_scaling,
-                tune_steps,
-                scaling,
-                n_steps,
-                max_steps,
-                p_acc_rate,
+                acc_rate, proposed, tune_scaling, tune_steps, scaling, max_steps, p_acc_rate
             )
 
-        pm._log.info("Stage: {:d} Beta: {:.3f} Steps: {:d}".format(stage, beta, n_steps))
+        pm._log.info("Stage: {:3d} Beta: {:.3f} Steps: {:3d}".format(stage, beta, n_steps))
         # Apply Metropolis kernel (mutation)
         proposed = draws * n_steps
         priors = np.array([prior_logp(sample) for sample in posterior]).squeeze()
         tempered_logp = priors + likelihoods * beta
-        deltas = np.squeeze(proposal(n_steps) * scaling)
 
         parameters = (
             proposal,
@@ -240,9 +235,7 @@ def sample_smc(
             likelihood_logp,
             beta,
         )
-
         if parallel and cores > 1:
-            pool = mp.Pool(processes=cores)
             results = pool.starmap(
                 metrop_kernel,
                 [(posterior[draw], tempered_logp[draw], *parameters) for draw in range(draws)],
@@ -258,6 +251,9 @@ def sample_smc(
         acc_rate = sum(acc_list) / proposed
         stage += 1
 
+    if parallel and cores > 1:
+        pool.close()
+        pool.join()
     trace = _posterior_to_trace(posterior, variables, model, var_info)
 
     return trace
diff --git a/pymc3/smc/smc_utils.py b/pymc3/smc/smc_utils.py
@@ -6,6 +6,7 @@
 import pymc3 as pm
 import theano
 from ..step_methods.arraystep import metrop_select
+from ..step_methods.metropolis import tune
 from ..backends.ndarray import NDArray
 from ..backends.base import MultiTrace
 from ..theanof import floatX, join_nonshared_inputs
@@ -51,7 +52,7 @@ def _calc_covariance(posterior, weights):
     return cov
 
 
-def _tune(acc_rate, proposed, tune_scaling, tune_steps, scaling, n_steps, max_steps, p_acc_rate):
+def _tune(acc_rate, proposed, tune_scaling, tune_steps, scaling, max_steps, p_acc_rate):
     """
     Tune scaling and/or n_steps based on the acceptance rate.
 
@@ -61,17 +62,26 @@ def _tune(acc_rate, proposed, tune_scaling, tune_steps, scaling, n_steps, max_st
         Acceptance rate of the previous stage
     proposed: int
         Total number of proposed steps (draws * n_steps)
-    step: SMC step method
+    tune_scaling : bool
+        Whether to compute the scaling factor automatically or not
+    tune_steps : bool
+        Whether to compute the number of steps automatically or not
+    scaling : float
+        Scaling factor applied to the proposal distribution
+    max_steps : int
+        The maximum number of steps of each Markov Chain.
+    p_acc_rate : float
+        The higher the value of the higher the number of steps computed automatically. It should be
+        between 0 and 1.
     """
     if tune_scaling:
-        # a and b after Muto & Beck 2008.
-        a = 1 / 9
-        b = 8 / 9
-        scaling = (a + b * acc_rate) ** 2
+        scaling = tune(scaling, acc_rate)
 
     if tune_steps:
         acc_rate = max(1.0 / proposed, acc_rate)
         n_steps = min(max_steps, max(2, int(np.log(1 - p_acc_rate) / np.log(1 - acc_rate))))
+    else:
+        n_steps = max_steps
 
     return scaling, n_steps
 
@@ -115,6 +125,7 @@ def metrop_kernel(
     Metropolis kernel
     """
     deltas = np.squeeze(proposal(n_steps) * scaling)
+
     for n_step in range(n_steps):
         delta = deltas[n_step]
 
@@ -141,7 +152,7 @@ def metrop_kernel(
     return q_old, accepted
 
 
-def calc_beta(beta, likelihoods, threshold=0.5):
+def calc_beta(beta, likelihoods, threshold=0.5, psis=True):
     """
     Calculate next inverse temperature (beta) and importance weights based on current beta
     and tempered likelihood.
@@ -185,9 +196,11 @@ def calc_beta(beta, likelihoods, threshold=0.5):
             low_beta = new_beta
     if new_beta >= 1:
         new_beta = 1
+        weights_un = np.exp((new_beta - old_beta) * (likelihoods - likelihoods.max()))
+        weights = weights_un / np.sum(weights_un)
+
     sj = np.exp((new_beta - old_beta) * likelihoods)
-    weights_un = np.exp((new_beta - old_beta) * (likelihoods - likelihoods.max()))
-    weights = weights_un / np.sum(weights_un)
+
     return new_beta, old_beta, weights, np.mean(sj)