|
9 | 9 |
|
10 | 10 | from .arraystep import metrop_select
|
11 | 11 | from .metropolis import MultivariateNormalProposal
|
| 12 | +from .smc_utils import _initial_population, _calc_covariance, _tune, _posterior_to_trace |
12 | 13 | from ..theanof import floatX, make_shared_replacements, join_nonshared_inputs, inputvars
|
13 | 14 | from ..model import modelcontext
|
14 |
| -from ..backends.ndarray import NDArray |
15 |
| -from ..backends.base import MultiTrace |
16 | 15 |
|
17 | 16 |
|
18 | 17 | __all__ = ["SMC", "sample_smc"]
|
19 | 18 |
|
20 | 19 |
|
21 | 20 | class SMC:
|
22 |
| - """ |
| 21 | + R""" |
23 | 22 | Sequential Monte Carlo step
|
24 | 23 |
|
25 | 24 | Parameters
|
@@ -50,6 +49,34 @@ class SMC:
|
50 | 49 | model : :class:`pymc3.Model`
|
51 | 50 | Optional model for sampling step. Defaults to None (taken from context).
|
52 | 51 |
|
| 52 | + Notes |
| 53 | + ----- |
| 54 | + SMC works by moving from successive stages. At each stage the inverse temperature \beta is |
| 55 | + increased a little bit (starting from 0 up to 1). When \beta = 0 we have the prior distribution |
| 56 | + and when \beta =1 we have the posterior distribution. So in more general terms we are always |
| 57 | + computing samples from a tempered posterior that we can write as: |
| 58 | +
|
| 59 | + p(\theta \mid y)_{\beta} = p(y \mid \theta)^{\beta} p(\theta) |
| 60 | +
|
| 61 | + A summary of the algorithm is: |
| 62 | +
|
| 63 | + 1. Initialize \beta at zero and stage at zero. |
| 64 | + 2. Generate N samples S_{\beta} from the prior (because when \beta = 0 the tempered posterior |
| 65 | + is the prior). |
| 66 | + 3. Increase \beta in order to make the effective sample size equals some predefined value |
| 67 | + (we use N*t, where t is 0.5 by default). |
| 68 | + 4. Compute a set of N importance weights W. The weights are computed as the ratio of the |
| 69 | + likelihoods of a sample at stage i+1 and stage i. |
| 70 | + 5. Obtain S_{w} by re-sampling according to W. |
| 71 | + 6. Use W to compute the covariance for the proposal distribution. |
| 72 | + 7. For stages other than 0 use the acceptance rate from the previous stage to estimate the |
| 73 | + scaling of the proposal distribution and n_steps. |
| 74 | + 8. Run N Metropolis chains (each one of length n_steps), starting each one from a different |
| 75 | + sample in S_{w}. |
| 76 | + 9. Repeat from step 3 until \beta \ge 1. |
| 77 | + 10. The final result is a collection of N samples from the posterior. |
| 78 | +
|
| 79 | +
|
53 | 80 | References
|
54 | 81 | ----------
|
55 | 82 | .. [Minson2013] Minson, S. E. and Simons, M. and Beck, J. L., (2013),
|
@@ -144,13 +171,7 @@ def sample_smc(draws=5000, step=None, cores=None, progressbar=False, model=None,
|
144 | 171 | # compute scaling (optional) and number of Markov chains steps (optional), based on the
|
145 | 172 | # acceptance rate of the previous stage
|
146 | 173 | if (step.tune_scaling or step.tune_steps) and stage > 0:
|
147 |
| - if step.tune_scaling: |
148 |
| - step.scaling = _tune(acc_rate) |
149 |
| - if step.tune_steps: |
150 |
| - acc_rate = max(1.0 / proposed, acc_rate) |
151 |
| - step.n_steps = min( |
152 |
| - step.max_steps, 1 + int(np.log(step.p_acc_rate) / np.log(1 - acc_rate)) |
153 |
| - ) |
| 174 | + _tune(acc_rate, proposed, step) |
154 | 175 |
|
155 | 176 | pm._log.info("Stage: {:d} Beta: {:.3f} Steps: {:d}".format(stage, beta, step.n_steps))
|
156 | 177 | # Apply Metropolis kernel (mutation)
|
@@ -236,25 +257,6 @@ def _metrop_kernel(
|
236 | 257 | return q_old, accepted
|
237 | 258 |
|
238 | 259 |
|
239 |
| -def _initial_population(draws, model, variables): |
240 |
| - """ |
241 |
| - Create an initial population from the prior |
242 |
| - """ |
243 |
| - |
244 |
| - population = [] |
245 |
| - var_info = {} |
246 |
| - start = model.test_point |
247 |
| - init_rnd = pm.sample_prior_predictive(draws, model=model) |
248 |
| - for v in variables: |
249 |
| - var_info[v.name] = (start[v.name].shape, start[v.name].size) |
250 |
| - |
251 |
| - for i in range(draws): |
252 |
| - point = pm.Point({v.name: init_rnd[v.name][i] for v in variables}, model=model) |
253 |
| - population.append(model.dict_to_array(point)) |
254 |
| - |
255 |
| - return np.array(floatX(population)), var_info |
256 |
| - |
257 |
| - |
258 | 260 | def _calc_beta(beta, likelihoods, threshold=0.5):
|
259 | 261 | """
|
260 | 262 | Calculate next inverse temperature (beta) and importance weights based on current beta
|
@@ -305,56 +307,6 @@ def _calc_beta(beta, likelihoods, threshold=0.5):
|
305 | 307 | return new_beta, old_beta, weights, np.mean(sj)
|
306 | 308 |
|
307 | 309 |
|
308 |
| -def _calc_covariance(posterior, weights): |
309 |
| - """ |
310 |
| - Calculate trace covariance matrix based on importance weights. |
311 |
| - """ |
312 |
| - cov = np.cov(posterior, aweights=weights.ravel(), bias=False, rowvar=0) |
313 |
| - if np.isnan(cov).any() or np.isinf(cov).any(): |
314 |
| - raise ValueError('Sample covariances not valid! Likely "draws" is too small!') |
315 |
| - return np.atleast_2d(cov) |
316 |
| - |
317 |
| - |
318 |
| -def _tune(acc_rate): |
319 |
| - """ |
320 |
| - Tune adaptively based on the acceptance rate. |
321 |
| -
|
322 |
| - Parameters |
323 |
| - ---------- |
324 |
| - acc_rate: float |
325 |
| - Acceptance rate of the Metropolis sampling |
326 |
| -
|
327 |
| - Returns |
328 |
| - ------- |
329 |
| - scaling: float |
330 |
| - """ |
331 |
| - # a and b after Muto & Beck 2008. |
332 |
| - a = 1.0 / 9 |
333 |
| - b = 8.0 / 9 |
334 |
| - return (a + b * acc_rate) ** 2 |
335 |
| - |
336 |
| - |
337 |
| -def _posterior_to_trace(posterior, variables, model, var_info): |
338 |
| - """ |
339 |
| - Save results into a PyMC3 trace |
340 |
| - """ |
341 |
| - lenght_pos = len(posterior) |
342 |
| - varnames = [v.name for v in variables] |
343 |
| - |
344 |
| - with model: |
345 |
| - strace = NDArray(model) |
346 |
| - strace.setup(lenght_pos, 0) |
347 |
| - for i in range(lenght_pos): |
348 |
| - value = [] |
349 |
| - size = 0 |
350 |
| - for var in varnames: |
351 |
| - shape, new_size = var_info[var] |
352 |
| - value.append(posterior[i][size : size + new_size].reshape(shape)) |
353 |
| - size += new_size |
354 |
| - strace.record({k: v for k, v in zip(varnames, value)}) |
355 |
| - return MultiTrace([strace]) |
356 |
| - |
357 |
| - |
358 | 310 | def logp_forw(out_vars, vars, shared):
|
359 | 311 | """Compile Theano function of the model and the input and output variables.
|
360 | 312 |
|
|
0 commit comments