Skip to content

Change SMC metropolis kernel to independent metropolis kernel #4115

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 6 commits into from
Sep 21, 2020
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 5 additions & 5 deletions pymc3/smc/sample_smc.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ def sample_smc(
n_steps=25,
start=None,
tune_steps=True,
p_acc_rate=0.99,
p_acc_rate=0.85,
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Just realized my simulation use .99 still and it seems better than .85 - how confidence are you with this @aloctavodia?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Not sure at all. This is something to test and most likely change. Eyeballing 0.9 could be a good compromise. But for you chains/steps are cheaper,right? So maybe you can keep 0.99.

threshold=0.5,
save_sim_data=False,
model=None,
Expand All @@ -61,7 +61,7 @@ def sample_smc(
acceptance rate and `p_acc_rate`, the max number of steps is ``n_steps``.
start: dict, or array of dict
Starting point in parameter space. It should be a list of dict with length `chains`.
When None (default) the starting point is sampled from the prior distribution.
When None (default) the starting point is sampled from the prior distribution.
tune_steps: bool
Whether to compute the number of steps automatically or not. Defaults to True
p_acc_rate: float
Expand Down Expand Up @@ -147,8 +147,8 @@ def sample_smc(
cores = 1

_log.info(
f"Multiprocess sampling ({chains} chain{'s' if chains > 1 else ''} "
f"in {cores} job{'s' if cores > 1 else ''})"
f"Sampling {chains} chain{'s' if chains > 1 else ''} "
f"in {cores} job{'s' if cores > 1 else ''}"
)

if random_seed == -1:
Expand Down Expand Up @@ -200,11 +200,11 @@ def sample_smc(
trace = MultiTrace(traces)
trace.report._n_draws = draws
trace.report._n_tune = 0
trace.report._t_sampling = time.time() - t1
trace.report.log_marginal_likelihood = np.array(log_marginal_likelihoods)
trace.report.betas = betas
trace.report.accept_ratios = accept_ratios
trace.report.nsteps = nsteps
trace.report._t_sampling = time.time() - t1

if save_sim_data:
return trace, {modelcontext(model).observed_RVs[0].name: np.array(sim_data)}
Expand Down
49 changes: 23 additions & 26 deletions pymc3/smc/smc.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@

import numpy as np
from scipy.special import logsumexp
from scipy.stats import multivariate_normal
from theano import function as theano_function
import theano.tensor as tt

Expand All @@ -33,7 +34,7 @@ def __init__(
n_steps=25,
start=None,
tune_steps=True,
p_acc_rate=0.99,
p_acc_rate=0.85,
threshold=0.5,
save_sim_data=False,
model=None,
Expand All @@ -42,7 +43,7 @@ def __init__(
):

self.draws = draws
self.kernel = kernel
self.kernel = kernel.lower()
self.n_steps = n_steps
self.start = start
self.tune_steps = tune_steps
Expand All @@ -64,8 +65,6 @@ def __init__(
self.acc_rate = 1
self.acc_per_chain = np.ones(self.draws)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

remove

self.variables = inputvars(self.model.vars)
self.dimension = sum(v.dsize for v in self.variables)
self.scalings = np.ones(self.draws) * 2.38 / (self.dimension) ** 0.5
self.weights = np.ones(self.draws) / self.draws
self.log_marginal_likelihood = 0
self.sim_data = []
Expand All @@ -78,7 +77,9 @@ def initialize_population(self):
var_info = OrderedDict()
if self.start is None:
init_rnd = sample_prior_predictive(
self.draws, var_names=[v.name for v in self.model.unobserved_RVs], model=self.model,
self.draws,
var_names=[v.name for v in self.model.unobserved_RVs],
model=self.model,
)
else:
init_rnd = self.start
Expand All @@ -102,7 +103,7 @@ def setup_kernel(self):
"""
shared = make_shared_replacements(self.variables, self.model)

if self.kernel.lower() == "abc":
if self.kernel == "abc":
factors = [var.logpt for var in self.model.free_RVs]
factors += [tt.sum(factor) for factor in self.model.potentials]
self.prior_logp_func = logp_forw([tt.sum(factors)], self.variables, shared)
Expand All @@ -122,7 +123,7 @@ def setup_kernel(self):
self.draws,
self.save_sim_data,
)
elif self.kernel.lower() == "metropolis":
elif self.kernel == "metropolis":
self.prior_logp_func = logp_forw([self.model.varlogpt], self.variables, shared)
self.likelihood_logp_func = logp_forw([self.model.datalogpt], self.variables, shared)

Expand All @@ -136,7 +137,7 @@ def initialize_logp(self):
self.prior_logp = np.array(priors).squeeze()
self.likelihood_logp = np.array(likelihoods).squeeze()

if self.save_sim_data:
if self.kernel == "abc" and self.save_sim_data:
self.sim_data = self.likelihood_logp_func.get_data()

def update_weights_beta(self):
Expand Down Expand Up @@ -181,7 +182,6 @@ def resample(self):
self.likelihood_logp = self.likelihood_logp[resampling_indexes]
self.posterior_logp = self.prior_logp + self.likelihood_logp * self.beta
self.acc_per_chain = self.acc_per_chain[resampling_indexes]
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

remove.

self.scalings = self.scalings[resampling_indexes]
if self.save_sim_data:
self.sim_data = self.sim_data[resampling_indexes]

Expand All @@ -198,47 +198,43 @@ def update_proposal(self):

def tune(self):
"""
Tune scaling and n_steps based on the acceptance rate.
Tune n_steps based on the acceptance rate.
"""
ave_scaling = np.exp(np.log(self.scalings.mean()) + (self.acc_per_chain.mean() - 0.234))
self.scalings = 0.5 * (
ave_scaling + np.exp(np.log(self.scalings) + (self.acc_per_chain - 0.234))
)

if self.tune_steps:
acc_rate = max(1.0 / self.proposed, self.acc_rate)
self.n_steps = min(
self.max_steps, max(2, int(np.log(1 - self.p_acc_rate) / np.log(1 - acc_rate))),
self.max_steps,
max(2, int(np.log(1 - self.p_acc_rate) / np.log(1 - acc_rate))),
)

self.proposed = self.draws * self.n_steps

def mutate(self):
ac_ = np.empty((self.n_steps, self.draws))

proposals = (
np.random.multivariate_normal(
np.zeros(self.dimension), self.cov, size=(self.n_steps, self.draws)
)
* self.scalings[:, None]
)
log_R = np.log(np.random.rand(self.n_steps, self.draws))

dist = multivariate_normal(self.posterior.mean(axis=0), self.cov)

for n_step in range(self.n_steps):
proposal = floatX(self.posterior + proposals[n_step])
proposal = floatX(dist.rvs(size=self.draws))
proposal = proposal.reshape(len(proposal), -1)
forward = dist.logpdf(proposal)
backward = multivariate_normal(proposal.mean(axis=0), self.cov).logpdf(self.posterior)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I want to understand a bit better what the changes are doing here, so my summary is that:

1, it removes the tuning related to self.scalings, which originally is to tune a scaling of the proposal so that each chain approach a target acceptance rate 0.234;

2, the new transition kernel use global information from the current posterior mu = self.posterior.mean(axis=0) to build a MVNormal, and to account for the transition
image
(notation from Wikipedia).
For that, the "backward"
image
use a new MvNormal with mu = mean(x_prime).
It is independent MH as we have
image

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Right!

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Cool - could you add some of these as inline comments?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Any reason why dist = multivariate_normal(self.posterior.mean(axis=0), self.cov) is not in the for n_step ... loop?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Intuitively I thought about fixing the proposal before doing a productive run. Also because this will make easier to use more complex proposal distributions in the future, as you compute the mean(s) and covariance(s) one per stage.

ll = np.array([self.likelihood_logp_func(prop) for prop in proposal])
pl = np.array([self.prior_logp_func(prop) for prop in proposal])
proposal_logp = pl + ll * self.beta
accepted = log_R[n_step] < (proposal_logp - self.posterior_logp)
accepted = log_R[n_step] < (
(proposal_logp + backward) - (self.posterior_logp + forward)
)
ac_[n_step] = accepted
self.posterior[accepted] = proposal[accepted]
self.posterior_logp[accepted] = proposal_logp[accepted]
self.prior_logp[accepted] = pl[accepted]
self.likelihood_logp[accepted] = ll[accepted]
if self.save_sim_data:
if self.kernel == "abc" and self.save_sim_data:
self.sim_data[accepted] = self.likelihood_logp_func.get_data()[accepted]

self.acc_per_chain = np.mean(ac_, axis=0)
self.acc_rate = np.mean(ac_)

def posterior_to_trace(self):
Expand Down Expand Up @@ -341,6 +337,7 @@ def __init__(
self.observations = self.sum_stat(observations)

def posterior_to_function(self, posterior):
model = self.model
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Sorry, why is this added here if it's unused?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Good catch! This is a mistake, this is part of other ideas I was trying that did not make it into the PR.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@aloctavodia while we're here, just wanted to say that I'm studying your book and that it's really good!

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Glad you like it. @MarcoGorelli. Hopefully, next year we will publish a new one together with Junpeng, Ravin and Ari

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Awesome @aloctavodia - feel free to reach out to me if you want any help proof-reading

var_info = self.var_info

varvalues = []
Expand Down