diff --git a/RELEASE-NOTES.md b/RELEASE-NOTES.md index f17639f055..a8fa34cb1c 100644 --- a/RELEASE-NOTES.md +++ b/RELEASE-NOTES.md @@ -1,9 +1,11 @@ # Release Notes -## PyMC3 3.6 +## PyMC3 3.6 (unreleased) - Renamed `sample_ppc()` and `sample_ppc_w()` to `sample_posterior_predictive()` and `sample_posterior_predictive_w()`, respectively. +- Refactor SMC and properly compute marginal likelihood (#3124) + ## PyMC 3.5 (July 21 2018) @@ -440,3 +442,4 @@ Thus, Thomas, Chris and I are pleased to announce that PyMC3 is now in Beta. * maahnman * paul sorenson * zenourn + diff --git a/pymc3/backends/smc_text.py b/pymc3/backends/smc_text.py deleted file mode 100644 index 88f5779de9..0000000000 --- a/pymc3/backends/smc_text.py +++ /dev/null @@ -1,482 +0,0 @@ -"""Text file trace backend modified to work efficiently with SMC - -Store sampling values as CSV files. - -File format ------------ - -Sampling values for each chain are saved in a separate file (under a -directory specified by the `name` argument). The rows correspond to -sampling iterations. The column names consist of variable names and -index labels. For example, the heading - - x,y__0_0,y__0_1,y__1_0,y__1_1,y__2_0,y__2_1 - -represents two variables, x and y, where x is a scalar and y has a -shape of (3, 2). -""" -from glob import glob -import itertools -import multiprocessing -import pickle -import os -import shutil -from six.moves import map, zip - -import pandas as pd -import pymc3 as pm - -from ..model import modelcontext -from ..backends import base, ndarray -from . import tracetab as ttab -from ..blocking import DictToArrayBijection, ArrayOrdering,\ - ListArrayOrdering, ListToArrayBijection -from ..step_methods.arraystep import BlockedStep - - -def paripool(function, work, nprocs=None, chunksize=1): - """Initialise a pool of workers and execute a function in parallel by - forking the process. Does forking once during initialisation. - - Parameters - ---------- - function : callable - python function to be executed in parallel - work : list - of iterables that are to be looped over/ executed in parallel usually - these objects are different for each task. - nprocs : int - number of processors to be used in parallel process - chunksize : int - number of work packages to throw at workers in each instance - """ - if nprocs is None: - nprocs = multiprocessing.cpu_count() - - if nprocs == 1: - for work_item in work: - yield function(work_item) - else: - pool = multiprocessing.Pool(processes=nprocs) - try: - yield pool.map(function, work, chunksize=chunksize) - finally: - pool.terminate() - - -class ArrayStepSharedLLK(BlockedStep): - """Modified ArrayStepShared To handle returned larger point including the likelihood values. - Takes additionally a list of output vars including the likelihoods. - - Parameters - ---------- - - vars : list - variables to be sampled - out_vars : list - variables to be stored in the traces - shared : dict - theano variable -> shared variables - blocked : boolen - (default True) - """ - def __init__(self, vars, out_vars, shared, blocked=True): - self.vars = vars - self.ordering = ArrayOrdering(vars) - self.lordering = ListArrayOrdering(out_vars, intype='tensor') - lpoint = [var.tag.test_value for var in out_vars] - self.shared = {var.name: shared for var, shared in shared.items()} - self.blocked = blocked - self.bij = DictToArrayBijection(self.ordering, self.population[0]) - self.lij = ListToArrayBijection(self.lordering, lpoint) - - def __getstate__(self): - return self.__dict__ - - def __setstate__(self, state): - self.__dict__.update(state) - - def step(self, point): - for var, share in self.shared.items(): - share.container.storage[0] = point[var] - - apoint, alist = self.astep(self.bij.map(point)) - return self.bij.rmap(apoint), alist - - -class BaseSMCTrace(object): - """Base SMC trace object - - Parameters - ---------- - - name : str - Name of backend - model : Model - If None, the model is taken from the `with` context. - vars : list of variables - Sampling values will be stored for these variables. If None, - `model.unobserved_RVs` is used. - """ - - def __init__(self, name, model=None, vars=None): - self.name = name - model = modelcontext(model) - self.model = model - - if vars is None: - vars = model.unobserved_RVs - - self.vars = vars - self.varnames = [var.name for var in vars] - - # Get variable shapes. Most backends will need this information. - - self.var_shapes_list = [var.tag.test_value.shape for var in vars] - self.var_dtypes_list = [var.tag.test_value.dtype for var in vars] - - self.var_shapes = dict(zip(self.varnames, self.var_shapes_list)) - self.var_dtypes = dict(zip(self.varnames, self.var_dtypes_list)) - - self.chain = None - - def __getitem__(self, idx): - if isinstance(idx, slice): - return self._slice(idx) - try: - return self.point(int(idx)) - except (ValueError, TypeError): # Passed variable or variable name. - raise ValueError('Can only index with slice or integer') - - def __getstate__(self): - return self.__dict__ - - def __setstate__(self, state): - self.__dict__.update(state) - - -class TextStage(object): - def __init__(self, base_dir): - self.base_dir = base_dir - self.project_dir = os.path.dirname(base_dir) - self.mode = os.path.basename(base_dir) - if not os.path.isdir(base_dir): - os.mkdir(self.base_dir) - - def stage_path(self, stage): - return os.path.join(self.base_dir, 'stage_{}'.format(stage)) - - def stage_number(self, stage_path): - """Inverse function of TextStage.path""" - return int(os.path.basename(stage_path).split('_')[-1]) - - def highest_sampled_stage(self): - """Return stage number of stage that has been sampled before the final stage. - - Returns - ------- - stage number : int - """ - return max(self.stage_number(s) for s in glob(self.stage_path('*'))) - - def atmip_path(self, stage_number): - """Consistent naming for atmip params.""" - return os.path.join(self.stage_path(stage_number), 'atmip.params.pkl') - - def load_atmip_params(self, stage_number, model): - """Load saved parameters from last sampled ATMIP stage. - - Parameters - ---------- - stage number : int - of stage number or -1 for last stage - """ - if stage_number == -1: - prev = self.highest_sampled_stage() - else: - prev = stage_number - 1 - pm._log.info('Loading parameters from completed stage {}'.format(prev)) - - with model: - with open(self.atmip_path(prev), 'rb') as buff: - step = pickle.load(buff) - - # update step stage to current stage - step.stage = stage_number - return step - - def dump_atmip_params(self, step): - """Save atmip params to file.""" - with open(self.atmip_path(step.stage), 'wb') as buff: - pickle.dump(step, buff, protocol=pickle.HIGHEST_PROTOCOL) - - def clean_directory(self, stage, chains, rm_flag): - """Optionally remove directory for the stage. Does nothing if rm_flag is False.""" - stage_path = self.stage_path(stage) - if rm_flag: - if os.path.exists(stage_path): - pm._log.info('Removing previous sampling results ... %s' % stage_path) - shutil.rmtree(stage_path) - chains = None - elif not os.path.exists(stage_path): - chains = None - return chains - - def load_multitrace(self, stage_number, model=None): - """Load TextChain database. - - Parameters - ---------- - stage_number : int - Number of stage to load files from (one per chain) - model : Model - If None, the model is taken from the `with` context. - - Returns - ------- - - A :class:`pymc3.backend.base.MultiTrace` instance - """ - dirname = self.stage_path(stage_number) - files = glob(os.path.join(dirname, 'chain_*.csv')) - if len(files) < 1: - pm._log.warn('Stage directory %s contains no traces!' % dirname) - - straces = [] - for f in files: - chain = int(os.path.basename(f).split('_')[-1].split('.')[0]) - strace = TextChain(dirname, model=model) - strace.chain = chain - strace.filename = f - straces.append(strace) - return base.MultiTrace(straces) - - def check_multitrace(self, mtrace, draws, n_chains): - """Check multitrace for incomplete sampling and return indexes from chains - that need to be resampled. - - Parameters - ---------- - mtrace : :class:`pymc3.backend.base.MultiTrace` - Multitrace object containing the sampling traces - draws : int - Number of steps (i.e. chain length for each Markov Chain) - n_chains : int - Number of Markov Chains - - Returns - ------- - list of indexes for chains that need to be resampled - """ - not_sampled_idx = [] - for chain in range(n_chains): - if chain in mtrace.chains: - if len(mtrace._straces[chain]) != draws: - pm._log.warn('Trace number %i incomplete' % chain) - mtrace._straces[chain].corrupted_flag = True - else: - not_sampled_idx.append(chain) - - flag_bool = [mtrace._straces[chain].corrupted_flag for chain in mtrace.chains] - corrupted_idx = [i for i, x in enumerate(flag_bool) if x] - return corrupted_idx + not_sampled_idx - - def recover_existing_results(self, stage_number, draws, chains, step, model=None): - stage_path = self.stage_path(stage_number) - if os.path.exists(stage_path): - # load incomplete stage results - pm._log.info('Reloading existing results ...') - mtrace = self.load_multitrace(stage_number, model=model) - if len(mtrace.chains) > 0: - # continue sampling if traces exist - pm._log.info('Checking for corrupted files ...') - return self.check_multitrace(mtrace, draws=draws, n_chains=chains) - pm._log.info('Init new trace!') - return None - - def create_result_trace(self, stage_number=-1, idxs=(-1, ), step=None, model=None): - """ - Concatenate points from all traces into one single trace, which can be used by - traceplot. Stores result trace in stage_-2. - - Parameters - ---------- - stage_number : int - stage of which result traces are loaded - idxs : iterable - of indexes to the point at each chain to extract and concatenate - Returns - ------- - MultiTrace - """ - if step is None: - step = self.load_atmip_params(stage_number, model) - - mtrace = self.load_multitrace(stage_number, model=model) - - summary_dir = self.stage_path(-2) - rtrace = TextChain(summary_dir, model=model) - draws = len(mtrace.chains) * len(idxs) - rtrace.setup(draws, chain=0) - - for idx in idxs: - for chain in mtrace.chains: - point = mtrace.point(idx, chain) - lpoint = step.lij.dmap(point) - rtrace.record(lpoint) - - rtrace.history = mtrace - - return base.MultiTrace([rtrace]) - - def load_result_trace(self, model=None): - """ - Load pymc3 format trace object to be used for plotting and summary. - - Parameters - ---------- - model : Model - If None, the model is taken from the `with` context. - - Returns - ------- - MutliTrace - """ - rtrace = self.load_multitrace(stage_number=-2, model=model) - rtrace.history = self.load_multitrace(stage_number=-1, model=model) - return rtrace - - -class TextChain(BaseSMCTrace): - """Text trace object - - Parameters - ---------- - - name : str - Name of directory to store text files - model : Model - If None, the model is taken from the `with` context. - vars : list of variables - Sampling values will be stored for these variables. If None, - `model.unobserved_RVs` is used. - """ - - def __init__(self, name, model=None, vars=None): - if not os.path.exists(name): - os.mkdir(name) - super(TextChain, self).__init__(name, model, vars) - - self.flat_names = {v: ttab.create_flat_names(v, shape) - for v, shape in self.var_shapes.items()} - - self.filename = None - self.df = None - self.corrupted_flag = False - - def setup(self, draws, chain): - """Perform chain-specific setup. - - Parameters - ---------- - draws : int - Expected number of draws - chain : int - Chain number - """ - self.chain = chain - self.filename = os.path.join(self.name, 'chain_{}.csv'.format(chain)) - - cnames = [fv for v in self.varnames for fv in self.flat_names[v]] - - if os.path.exists(self.filename): - os.remove(self.filename) - - with open(self.filename, 'w') as fh: - fh.write(','.join(cnames) + '\n') - - def record(self, lpoint): - """Record results of a sampling iteration. - - Parameters - ---------- - lpoint : List of variable values - Values mapped to variable names - """ - columns = itertools.chain.from_iterable(map(str, value.ravel()) for value in lpoint) - with open(self.filename, 'a') as fh: - fh.write(','.join(columns) + '\n') - - def _load_df(self): - if self.df is None: - try: - self.df = pd.read_csv(self.filename) - except pd.parser.EmptyDataError: - pm._log.warn('Trace %s is empty and needs to be resampled!' % self.filename) - os.remove(self.filename) - self.corrupted_flag = True - except pd.io.common.CParserError: - pm._log.warn('Trace %s has wrong size!' % self.filename) - self.corrupted_flag = True - os.remove(self.filename) - - def __len__(self): - if self.filename is None: - return 0 - - self._load_df() - - if self.df is None: - return 0 - else: - return self.df.shape[0] - - def get_values(self, varname, burn=0, thin=1): - """Get values from trace. - - Parameters - ---------- - varname : str - Variable name for which values are to be retrieved. - burn : int - Burn-in samples from trace. This is the number of samples to be - thrown out from the start of the trace - thin : int - Nuber of thinning samples. Throw out every 'thin' sample of the - trace. - - Returns - ------- - - :class:`numpy.array` - """ - self._load_df() - var_df = self.df[self.flat_names[varname]] - shape = (self.df.shape[0],) + self.var_shapes[varname] - vals = var_df.values.ravel().reshape(shape) - return vals[burn::thin] - - def _slice(self, idx): - if idx.stop is not None: - raise ValueError('Stop value in slice not supported.') - return ndarray._slice_as_ndarray(self, idx) - - def point(self, idx): - """Get point of current chain with variables names as keys. - - Parameters - ---------- - idx : int - Index of the nth step of the chain - - Returns - ------- - dictionary of point values - """ - idx = int(idx) - self._load_df() - pt = {} - for varname in self.varnames: - vals = self.df[self.flat_names[varname]].iloc[idx].values - pt[varname] = vals.reshape(self.var_shapes[varname]) - return pt diff --git a/pymc3/sampling.py b/pymc3/sampling.py index 9deb194454..2d3ad018a9 100644 --- a/pymc3/sampling.py +++ b/pymc3/sampling.py @@ -323,23 +323,12 @@ def sample(draws=500, step=None, init='auto', n_init=200000, start=None, trace=N if isinstance(step, pm.step_methods.smc.SMC): if step_kwargs is None: step_kwargs = {} - if chains is None: - chains = 100 - if cores is None: - cores = 1 test_folder = mkdtemp(prefix='SMC_TEST') - trace = smc.sample_smc(samples=draws, - chains=chains, + trace = smc.sample_smc(draws=draws, step=step, - start=start, - homepath=step_kwargs.get('homepath', test_folder), - stage=step_kwargs.get('stage', 0), - cores=cores, progressbar=progressbar, model=model, - random_seed=random_seed, - rm_flag=step_kwargs.get('rm_flag', True), - **kwargs) + random_seed=random_seed) else: if cores is None: cores = min(4, _cpu_count()) diff --git a/pymc3/step_methods/smc.py b/pymc3/step_methods/smc.py index 8ec76efd43..548bf605cb 100644 --- a/pymc3/step_methods/smc.py +++ b/pymc3/step_methods/smc.py @@ -1,702 +1,259 @@ -"""Sequential Monte Carlo sampler also known as -Adaptive Transitional Markov Chain Monte Carlo sampler. - -Runs on any pymc3 model. - -Created on March, 2016 - -Various significant updates July, August 2016 - -Made pymc3 compatible November 2016 -Renamed to SMC and further improvements March 2017 - -@author: Hannes Vasyura-Bathke +""" +Sequential Monte Carlo sampler """ import numpy as np +import theano import pymc3 as pm from tqdm import tqdm +import warnings -import theano - +from .arraystep import metrop_select +from .metropolis import MultivariateNormalProposal +from ..theanof import floatX, make_shared_replacements, join_nonshared_inputs from ..model import modelcontext -from ..vartypes import discrete_types -from ..theanof import inputvars, make_shared_replacements, join_nonshared_inputs -import numpy.random as nr +from ..backends.ndarray import NDArray +from ..backends.base import MultiTrace -from .metropolis import MultivariateNormalProposal -from .arraystep import metrop_select -from ..backends import smc_text as atext __all__ = ['SMC', 'sample_smc'] proposal_dists = {'MultivariateNormal': MultivariateNormalProposal} -def choose_proposal(proposal_name, scale=1.): - """Initialize and select proposal distribution. - - Parameters - ---------- - proposal_name : string - Name of the proposal distribution to initialize - scale : float or :class:`numpy.ndarray` - - Returns - ------- - class:`pymc3.Proposal` Object +class SMC(): """ - return proposal_dists[proposal_name](scale) - - -class SMC(atext.ArrayStepSharedLLK): - """Adaptive Transitional Markov-Chain Monte-Carlo sampler class. - - Creates initial samples and framework around the (C)ATMIP parameters + Sequential Monte Carlo step Parameters ---------- - vars : list - List of variables for sampler - out_vars : list - List of output variables for trace recording. If empty unobserved_RVs are taken. n_steps : int - The number of steps of a Markov Chain. If `tune_interval > 0` `n_steps` will be used for - the first and last stages, and the number of steps of the intermediate states will be - determined automatically. Otherwise, if `tune_interval = 0`, `n_steps` will be used for - all stages. + The number of steps of a Markov Chain. If `tune == True` `n_steps` will be used for + the first stage, and the number of steps of the other states will be determined + automatically based on the acceptance rate and `p_acc_rate`. scaling : float Factor applied to the proposal distribution i.e. the step size of the Markov Chain. Only - works if `tune_interval=0` otherwise it will be determined automatically + works if `tune == False` otherwise is determined automatically p_acc_rate : float - Probability of not accepting a step. Used to compute `n_steps` when `tune_interval > 0`. - It should be between 0 and 1. - covariance : :class:`numpy.ndarray` - (chains x chains) - Initial Covariance matrix for proposal distribution, if None - identity matrix taken - likelihood_name : string - name of the :class:`pymc3.deterministic` variable that contains the model likelihood. - Defaults to 'l_like__' + Probability of not accepting a Markov Chain proposal. Used to compute `n_steps` when + `tune == True`. It should be between 0 and 1. proposal_name : - Type of proposal distribution, see smc.proposal_dists.keys() for options - tune_interval : int - Number of steps to tune for. If tune=0 no tunning will be used. Default 10. SMC tunes two - related quantities, the scaling of the proposal distribution (i.e. the step size of Markov - Chain) and the number of steps of a Markov Chain (i.e. `n_steps`). + Type of proposal distribution. Currently the only valid option is `MultivariateNormal`. threshold : float Determines the change of beta from stage to stage, i.e.indirectly the number of stages, - the higher the value of threshold the higher the number of stage. Defaults to 0.5. + the higher the value of threshold the higher the number of stages. Defaults to 0.5. It should be between 0 and 1. - check_bound : boolean - Check if current sample lies outside of variable definition speeds up computation as the - forward model wont be executed. Default: True model : :class:`pymc3.Model` Optional model for sampling step. Defaults to None (taken from context). - random_seed : int - Optional to set the random seed. Necessary for initial population. References ---------- + .. [Minson2013] Minson, S. E. and Simons, M. and Beck, J. L., (2013), + Bayesian inversion for finite fault earthquake source models I- Theory and algorithm. + Geophysical Journal International, 2013, 194(3), pp.1701-1726, + `link `__ + .. [Ching2007] Ching, J. and Chen, Y. (2007). Transitional Markov Chain Monte Carlo Method for Bayesian Model Updating, Model Class Selection, and Model Averaging. J. Eng. Mech., 10.1061/(ASCE)0733-9399(2007)133:7(816), 816-832. `link `__ """ - default_blocked = True - - def __init__(self, vars=None, out_vars=None, n_steps=25, scaling=1., p_acc_rate=0.001, - covariance=None, likelihood_name='l_like__', proposal_name='MultivariateNormal', - tune_interval=10, threshold=0.5, check_bound=True, model=None, random_seed=-1): - - if random_seed != -1: - nr.seed(random_seed) - - model = modelcontext(model) - - if vars is None: - vars = model.vars - - vars = inputvars(vars) - if out_vars is None: - if not any(likelihood_name == RV.name for RV in model.unobserved_RVs): - pm._log.info('Adding model likelihood to RVs!') - with model: - llk = pm.Deterministic(likelihood_name, model.logpt) - else: - pm._log.info('Using present model likelihood!') + def __init__(self, n_steps=5, scaling=1., p_acc_rate=0.01, tune=True, + proposal_name='MultivariateNormal', threshold=0.5): - out_vars = model.unobserved_RVs - - out_varnames = [out_var.name for out_var in out_vars] - - if covariance is None and proposal_name == 'MultivariateNormal': - self.covariance = np.eye(sum(v.dsize for v in vars)) - scale = self.covariance - elif covariance is None: - scale = np.ones(sum(v.dsize for v in vars)) - else: - scale = covariance - - self.vars = vars - self.proposal_name = proposal_name - self.proposal_dist = choose_proposal(self.proposal_name, scale=scale) - self.scaling = np.atleast_1d(scaling) - self.check_bnd = check_bound - self.tune_interval = tune_interval - self.steps_until_tune = tune_interval - self.population = [model.test_point] self.n_steps = n_steps - self.n_steps_final = n_steps + self.scaling = scaling self.p_acc_rate = p_acc_rate - self.stage_sample = 0 - self.accepted = 0 - self.beta = 0 - #self.sjs = 1 - self.stage = 0 - self.chain_index = 0 + self.tune = tune + self.proposal = proposal_dists[proposal_name] self.threshold = threshold - self.likelihood_name = likelihood_name - self._llk_index = out_varnames.index(likelihood_name) - self.discrete = np.concatenate([[v.dtype in discrete_types] * (v.dsize or 1) for v in vars]) - self.any_discrete = self.discrete.any() - self.all_discrete = self.discrete.all() - - shared = make_shared_replacements(vars, model) - self.logp_forw = logp_forw(out_vars, vars, shared) - self.check_bnd = logp_forw([model.varlogpt], vars, shared) - super(SMC, self).__init__(vars, out_vars, shared) - def astep(self, q0): - if self.stage == 0: - l_new = self.logp_forw(q0) - q_new = q0 - - else: - if not self.steps_until_tune and self.tune_interval: - - # Tune scaling parameter - acc_rate = self.accepted / float(self.tune_interval) - self.scaling = tune(acc_rate) - # compute n_steps - if self.accepted == 0: - acc_rate = 1 / float(self.tune_interval) - self.n_steps = 1 + (np.ceil(np.log(self.p_acc_rate) / - np.log(1 - acc_rate)).astype(int)) - # Reset counter - self.steps_until_tune = self.tune_interval - self.accepted = 0 - self.stage_sample = 0 - - if not self.stage_sample: - self.proposal_samples_array = self.proposal_dist(self.n_steps) - - delta = self.proposal_samples_array[self.stage_sample, :] * self.scaling - - if self.any_discrete: - if self.all_discrete: - delta = np.round(delta, 0) - q0 = q0.astype(int) - q = (q0 + delta).astype(int) - else: - delta[self.discrete] = np.round(delta[self.discrete], 0).astype(int) - q = q0 + delta - q = q[self.discrete].astype(int) - else: - q = q0 + delta - - l0 = self.chain_previous_lpoint[self.chain_index] - - if self.check_bnd: - varlogp = self.check_bnd(q) - - if np.isfinite(varlogp): - logp = self.logp_forw(q) - q_new, accepted = metrop_select( - self.beta * (logp[self._llk_index] - l0[self._llk_index]), q, q0) - - if accepted: - self.accepted += 1 - l_new = logp - self.chain_previous_lpoint[self.chain_index] = l_new - else: - l_new = l0 - else: - q_new = q0 - l_new = l0 - - else: - logp = self.logp_forw(q) - q_new, accepted = metrop_select( - self.beta * (logp[self._llk_index] - l0[self._llk_index]), q, q0) - - if accepted: - self.accepted += 1 - l_new = logp - self.chain_previous_lpoint[self.chain_index] = l_new - else: - l_new = l0 - - self.steps_until_tune -= 1 - self.stage_sample += 1 - - # reset sample counter - if self.stage_sample == self.n_steps: - self.stage_sample = 0 - - return q_new, l_new - - def calc_beta(self): - """Calculate next tempering beta and importance weights based on current beta and sample - likelihoods. - - Returns - ------- - beta(m+1) : scalar, float - tempering parameter of the next stage - beta(m) : scalar, float - tempering parameter of the current stage - weights : :class:`numpy.ndarray` - Importance weights (floats) - """ - low_beta = old_beta = self.beta - up_beta = 2. - rN = int(len(self.likelihoods) * self.threshold) - - while up_beta - low_beta > 1e-6: - new_beta = (low_beta + up_beta) / 2. - weights_un = np.exp((new_beta - old_beta) * (self.likelihoods - self.likelihoods.max())) - - weights = weights_un / np.sum(weights_un) - ESS = int(1 / np.sum(weights ** 2)) - #ESS = int(1 / np.max(weights)) - if ESS == rN: - break - elif ESS < rN: - up_beta = new_beta - else: - low_beta = new_beta - - return new_beta, old_beta, weights#, np.mean(weights_un) - - def calc_covariance(self): - """Calculate trace covariance matrix based on importance weights. - - Returns - ------- - cov : :class:`numpy.ndarray` - weighted covariances (NumPy > 1.10. required) - """ - cov = np.cov(self.array_population, aweights=self.weights.ravel(), bias=False, rowvar=0) - if np.isnan(cov).any() or np.isinf(cov).any(): - raise ValueError('Sample covariances not valid! Likely "chains" is too small!') - return np.atleast_2d(cov) - - def select_end_points(self, mtrace, chains): - """Read trace results (variables and model likelihood) and take end points for each chain - and set as start population for the next stage. - - Parameters - ---------- - mtrace : :class:`.base.MultiTrace` - - Returns - ------- - population : list - of :func:`pymc3.Point` dictionaries - array_population : :class:`numpy.ndarray` - Array of trace end-points - likelihoods : :class:`numpy.ndarray` - Array of likelihoods of the trace end-points - """ - array_population = np.zeros((chains, self.ordering.size)) - n_steps = len(mtrace) - - # collect end points of each chain and put into array - for var, slc, shp, _ in self.ordering.vmap: - slc_population = mtrace.get_values(varname=var, burn=n_steps - 1, combine=True) - if len(shp) == 0: - array_population[:, slc] = np.atleast_2d(slc_population).T - else: - array_population[:, slc] = slc_population - # get likelihoods - likelihoods = mtrace.get_values(varname=self.likelihood_name, - burn=n_steps - 1, combine=True) - - # map end array_endpoints to dict points - population = [self.bij.rmap(row) for row in array_population] - - return population, array_population, likelihoods - - def get_chain_previous_lpoint(self, mtrace, chains): - """Read trace results and take end points for each chain and set as previous chain result - for comparison of metropolis select. - - Parameters - ---------- - mtrace : :class:`.base.MultiTrace` - - Returns - ------- - chain_previous_lpoint : list - all unobservedRV values, including dataset likelihoods - """ - array_population = np.zeros((chains, self.lordering.size)) - n_steps = len(mtrace) - for _, slc, shp, _, var in self.lordering.vmap: - slc_population = mtrace.get_values(varname=var, burn=n_steps - 1, combine=True) - if len(shp) == 0: - array_population[:, slc] = np.atleast_2d(slc_population).T - else: - array_population[:, slc] = slc_population - - return [self.lij.rmap(row) for row in array_population[self.resampling_indexes, :]] - - def mean_end_points(self): - """Calculate mean of the end-points and return point. - - Returns - ------- - Dictionary of trace variables - """ - return self.bij.rmap(self.array_population.mean(axis=0)) - - def resample(self, chains): - """Resample pdf based on importance weights. based on Kitagawas deterministic resampling - algorithm. - - Returns - ------- - outindex : :class:`numpy.ndarray` - Array of resampled trace indexes - """ - parents = np.arange(chains) - N_childs = np.zeros(chains, dtype=int) - - cum_dist = np.cumsum(self.weights) - u = (parents + np.random.rand()) / chains - j = 0 - for i in parents: - while u[i] > cum_dist[j]: - j += 1 - - N_childs[j] += 1 - - indx = 0 - outindx = np.zeros(chains, dtype=int) - for i in parents: - if N_childs[i] > 0: - for j in range(indx, (indx + N_childs[i])): - outindx[j] = parents[i] - - indx += N_childs[i] - - return outindx - - -def sample_smc(samples=1000, chains=100, step=None, start=None, homepath=None, stage=0, cores=1, - progressbar=False, model=None, random_seed=-1, rm_flag=True, **kwargs): - """Sequential Monte Carlo sampling - - Samples the parameter space using a `chains` number of parallel Metropolis chains. - Once finished, the sampled traces are evaluated: - - (1) Based on the likelihoods of the final samples, chains are weighted - (2) the weighted covariance of the ensemble is calculated and set as new proposal distribution - (3) the variation in the ensemble is calculated and also the next tempering parameter (`beta`) - (4) New `chains` Markov chains are seeded on the traces with high weight for a given number of - iterations, the iterations can be computed automatically. - (5) Repeat until `beta` > 1. +def sample_smc(draws=5000, step=None, progressbar=False, model=None, random_seed=-1): + """ + Sequential Monte Carlo sampling Parameters ---------- - samples : int - The number of samples to draw from the posterior (i.e. last stage). Defaults to 1000. - chains : int - Number of chains used to store samples in backend. + draws : int + The number of samples to draw from the posterior (i.e. last stage). And also the number of + independent Markov Chains. Defaults to 5000. step : :class:`SMC` SMC initialization object - start : List of dictionaries - with length of (`chains`). Starting points in parameter space (or partial point) - Defaults to random draws from variables (defaults to empty dict) - homepath : string - Result_folder for storing stages, will be created if not existing. - stage : int - Stage where to start or continue the calculation. It is possible to continue after - completed stages (`stage` should be the number of the completed stage + 1). If None the - start will be at `stage=0`. - cores : int - The number of cores to be used in parallel. Be aware that Theano has internal - parallelization. Sometimes this is more efficient especially for simple models. - `chains / cores` has to be an integer number! progressbar : bool Flag for displaying a progress bar - model : :class:`pymc3.Model` - (optional if in `with` context) has to contain deterministic variable name defined under - `step.likelihood_name` that contains the model likelihood - random_seed : int or list of ints - A list is accepted, more if `cores` is greater than one. - rm_flag : bool - If True existing stage result folders are being deleted prior to sampling. - - References - ---------- - .. [Minson2013] Minson, S. E. and Simons, M. and Beck, J. L., (2013), - Bayesian inversion for finite fault earthquake source models I- Theory and algorithm. - Geophysical Journal International, 2013, 194(3), pp.1701-1726, - `link `__ + model : pymc3 Model + optional if in `with` context + random_seed : int + random seed """ - + warnings.warn("Warning: SMC is experimental, hopefully it will be ready for PyMC 3.6") model = modelcontext(model) if random_seed != -1: - nr.seed(random_seed) - - if homepath is None: - raise TypeError('Argument `homepath` should be path to result_directory.') - - if cores > 1: - if not (chains / float(cores)).is_integer(): - raise TypeError('chains / cores has to be a whole number!') - - if not any(step.likelihood_name in var.name for var in model.deterministics): - raise TypeError('Model (deterministic) variables need to contain a variable {} as defined ' - 'in `step`.'.format(step.likelihood_name)) - - stage_handler = atext.TextStage(homepath) - - if progressbar and cores > 1: - progressbar = False - - if stage == 0: - # continue or start initial stage - step.stage = stage - draws = 1 - else: - step = stage_handler.load_atmip_params(stage, model=model) - draws = step.n_steps - - stage_handler.clean_directory(stage, None, rm_flag) - - x_chains = stage_handler.recover_existing_results(stage, draws, chains, step) - - step.resampling_indexes = np.arange(chains) - step.proposal_samples_array = step.proposal_dist(chains) - - if start is not None: - if len(start) != chains: - raise TypeError('Argument `start` should have dicts equal the ' - 'number of chains (`chains`)') - else: - step.population = start - else: - step.population = _initial_population(samples, chains, model, step.vars) - - with model: - while step.beta < 1: - if step.stage == 0: - # Initial stage - pm._log.info('Sample initial stage: ...') - draws = 1 - else: - draws = step.n_steps - - pm._log.info('Beta: %f Stage: %i' % (step.beta, step.stage)) - - # Metropolis sampling intermediate stages - x_chains = stage_handler.clean_directory(step.stage, x_chains, rm_flag) - sample_args = {'draws': draws, - 'step': step, - 'stage_path': stage_handler.stage_path(step.stage), - 'progressbar': progressbar, - 'model': model, - 'n_jobs': cores, - 'x_chains': x_chains, - 'chains': chains} - - _iter_parallel_chains(**sample_args) - - mtrace = stage_handler.load_multitrace(step.stage) - - step.population, step.array_population, step.likelihoods = step.select_end_points( - mtrace, chains) - - step.beta, step.old_beta, step.weights = step.calc_beta() - #step.beta, step.old_beta, step.weights, sj = step.calc_beta() - #step.sjs *= sj - - if step.beta > 1.: - pm._log.info('Beta > 1.: %f' % step.beta) - step.beta = 1. - stage_handler.dump_atmip_params(step) - if stage == -1: - x_chains = [] + np.random.seed(random_seed) + + beta = 0 + stage = 0 + acc_rate = 1 + model.marginal_likelihood = 1 + variables = model.vars + discrete = np.concatenate([[v.dtype in pm.discrete_types] * (v.dsize or 1) for v in variables]) + any_discrete = discrete.any() + all_discrete = discrete.all() + shared = make_shared_replacements(variables, model) + prior_logp = logp_forw([model.varlogpt], variables, shared) + likelihood_logp = logp_forw([model.datalogpt], variables, shared) + + pm._log.info('Sample initial stage: ...') + posterior, var_info = _initial_population(draws, model, variables) + + while beta < 1: + # compute plausibility weights (measure fitness) + likelihoods = np.array([likelihood_logp(sample) for sample in posterior]).squeeze() + beta, old_beta, weights, sj = _calc_beta(beta, likelihoods, step.threshold) + model.marginal_likelihood *= sj + # resample based on plausibility weights (selection) + resampling_indexes = np.random.choice(np.arange(draws), size=draws, p=weights) + posterior = posterior[resampling_indexes] + likelihoods = likelihoods[resampling_indexes] + + # compute proposal distribution based on weights + covariance = _calc_covariance(posterior, weights) + proposal = step.proposal(covariance) + + # compute scaling and number of Markov chains steps (optional), based on previous + # acceptance rate + if step.tune and stage > 0: + if acc_rate == 0: + acc_rate = 1. / step.n_steps + step.scaling = _tune(acc_rate) + step.n_steps = 1 + int(np.log(step.p_acc_rate) / np.log(1 - acc_rate)) + + pm._log.info('Stage: {:d} Beta: {:f} Steps: {:d} Acc: {:f}'.format(stage, beta, + step.n_steps, acc_rate)) + # Apply Metropolis kernel (mutation) + proposed = 0. + accepted = 0. + priors = np.array([prior_logp(sample) for sample in posterior]).squeeze() + tempered_post = priors + likelihoods * beta + for draw in tqdm(range(draws), disable=not progressbar): + old_tempered_post = tempered_post[draw] + q_old = posterior[draw] + deltas = np.squeeze(proposal(step.n_steps) * step.scaling) + for n_step in range(0, step.n_steps): + delta = deltas[n_step] + + if any_discrete: + if all_discrete: + delta = np.round(delta, 0).astype('int64') + q_old = q_old.astype('int64') + q_new = (q_old + delta).astype('int64') + else: + delta[discrete] = np.round(delta[discrete], 0) + q_new = (q_old + delta) else: - x_chains = None - else: - step.covariance = step.calc_covariance() - step.proposal_dist = choose_proposal(step.proposal_name, scale=step.covariance) - step.resampling_indexes = step.resample(chains) - step.chain_previous_lpoint = step.get_chain_previous_lpoint(mtrace, chains) + q_new = floatX(q_old + delta) - stage_handler.dump_atmip_params(step) + new_tempered_post = prior_logp(q_new) + likelihood_logp(q_new)[0] * beta - step.stage += 1 + q_old, accept = metrop_select(new_tempered_post - old_tempered_post, q_new, q_old) + if accept: + accepted += accept + posterior[draw] = q_old + old_tempered_post = new_tempered_post + proposed += 1. - # Metropolis sampling final stage - pm._log.info('Sample final stage') - step.stage = -1 - x_chains = stage_handler.clean_directory(step.stage, x_chains, rm_flag) - weights_un = np.exp((1 - step.old_beta) * (step.likelihoods - step.likelihoods.max())) - step.weights = weights_un / np.sum(weights_un) - step.covariance = step.calc_covariance() - step.proposal_dist = choose_proposal(step.proposal_name, scale=step.covariance) - step.resampling_indexes = step.resample(chains) - step.chain_previous_lpoint = step.get_chain_previous_lpoint(mtrace, chains) + acc_rate = accepted / proposed + stage += 1 - x_chains = nr.randint(0, chains, size=samples) + trace = _posterior_to_trace(posterior, model, var_info) - sample_args['draws'] = step.n_steps_final - sample_args['step'] = step - sample_args['stage_path'] = stage_handler.stage_path(step.stage) - sample_args['x_chains'] = x_chains - _iter_parallel_chains(**sample_args) + return trace - stage_handler.dump_atmip_params(step) - #model.marginal_likelihood = step.sjs - return stage_handler.create_result_trace(step.stage, - step=step, - model=model) - - -def _initial_population(samples, chains, model, variables): +def _initial_population(draws, model, variables): """ Create an initial population from the prior """ population = [] - init_rnd = {} + var_info = {} start = model.test_point + init_rnd = pm.sample_prior_predictive(draws, model=model) for v in variables: - if pm.util.is_transformed_name(v.name): - trans = v.distribution.transform_used.forward_val - init_rnd[v.name] = trans(v.distribution.dist.random(size=chains, point=start)) - else: - init_rnd[v.name] = v.random(size=chains, point=start) - - for i in range(chains): - population.append(pm.Point({v.name: init_rnd[v.name][i] for v in variables}, model=model)) - - return population - - -def _sample(draws, step=None, start=None, trace=None, chain=0, progressbar=True, model=None, - random_seed=-1, chain_idx=0): + var_info[v.name] = (start[v.name].shape, start[v.name].size) - sampling = _iter_sample(draws, step, start, trace, chain, model, random_seed, chain_idx) - - if progressbar: - sampling = tqdm(sampling, total=draws) - - try: - for strace in sampling: - pass - - except KeyboardInterrupt: - pass + for i in range(draws): + point = pm.Point({v.name: init_rnd[v.name][i] for v in variables}, model=model) + population.append(model.dict_to_array(point)) - return chain + return np.array(population), var_info -def _iter_sample(draws, step, start=None, trace=None, chain=0, model=None, random_seed=-1, chain_idx=0): +def _calc_beta(beta, likelihoods, threshold=0.5): """ - Modified from :func:`pymc3.sampling._iter_sample` to be more efficient with SMC algorithm. - """ - model = modelcontext(model) - draws = int(draws) - if draws < 1: - raise ValueError('Argument `draws` should be above 0.') - - if start is None: - start = {} - - if random_seed != -1: - nr.seed(random_seed) - - try: - step = pm.step_methods.CompoundStep(step) - except TypeError: - pass - - point = pm.Point(start, model=model) - step.chain_index = chain - trace.setup(draws, chain_idx) - for i in range(draws): - point, out_list = step.step(point) - trace.record(out_list) - yield trace - - -def _work_chain(work): - """Wrapper function for parallel execution of _sample i.e. the Markov Chains. + Calculate next inverse temperature (beta) and importance weights based on current beta + and tempered likelihood. Parameters ---------- - work : List - Containing all the information that is unique for each Markov Chain - i.e. [:class:'SMC', chain_number(int), sampling index(int), start_point(dictionary)] + beta : float + tempering parameter of current stage + likelihoods : numpy array + likelihoods computed from the model + threshold : float + Determines the change of beta from stage to stage, i.e.indirectly the number of stages, + the higher the value of threshold the higher the number of stage. Defaults to 0.5. + It should be between 0 and 1. Returns ------- - chain : int - Index of chain that has been sampled + new_beta : float + tempering parameter of the next stage + old_beta : float + tempering parameter of the current stage + weights : numpy array + Importance weights (floats) + sj : float + Partial marginal likelihood """ - return _sample(*work) + low_beta = old_beta = beta + up_beta = 2. + rN = int(len(likelihoods) * threshold) + + while up_beta - low_beta > 1e-6: + new_beta = (low_beta + up_beta) / 2. + weights_un = np.exp((new_beta - old_beta) * (likelihoods - likelihoods.max())) + weights = weights_un / np.sum(weights_un) + ESS = int(1 / np.sum(weights ** 2)) + if ESS == rN: + break + elif ESS < rN: + up_beta = new_beta + else: + low_beta = new_beta + if new_beta >= 1: + new_beta = 1 + sj = np.exp((new_beta - old_beta) * likelihoods) + weights_un = np.exp((new_beta - old_beta) * (likelihoods - likelihoods.max())) + weights = weights_un / np.sum(weights_un) + return new_beta, old_beta, weights, np.mean(sj) -def _iter_parallel_chains(draws, step, stage_path, progressbar, model, n_jobs, chains, - x_chains=None): - """Do Metropolis sampling over all the x_chains with each chain being sampled 'draws' times. - Parallel execution according to n_jobs. +def _calc_covariance(posterior_array, weights): """ - if x_chains is None: - x_chains = range(chains) - - chain_idx = range(0, len(x_chains)) - pm._log.info('Initializing chain traces ...') - - max_int = np.iinfo(np.int32).max - - random_seeds = nr.randint(1, max_int, size=len(x_chains)) - pm._log.info('Sampling ...') - - work = [(draws, - step, - step.population[step.resampling_indexes[chain]], - atext.TextChain(stage_path, model=model), - chain, - False, - model, - rseed, - chain_idx) for chain, rseed, chain_idx in zip(x_chains, random_seeds, chain_idx)] - - if draws < 10: - chunksize = n_jobs - else: - chunksize = 1 - - p = atext.paripool(_work_chain, work, chunksize=chunksize, nprocs=n_jobs) - - if n_jobs == 1 and progressbar: - p = tqdm(p, total=len(x_chains)) - - for _ in p: - pass + Calculate trace covariance matrix based on importance weights. + """ + cov = np.cov(np.squeeze(posterior_array), aweights=weights.ravel(), bias=False, rowvar=0) + if np.isnan(cov).any() or np.isinf(cov).any(): + raise ValueError('Sample covariances not valid! Likely "chains" is too small!') + return np.atleast_2d(cov) -def tune(acc_rate): - """Tune adaptively based on the acceptance rate. +def _tune(acc_rate): + """ + Tune adaptively based on the acceptance rate. Parameters ---------- @@ -713,6 +270,27 @@ def tune(acc_rate): return (a + b * acc_rate) ** 2 +def _posterior_to_trace(posterior, model, var_info): + """ + Save results into a PyMC3 trace + """ + lenght_pos = len(posterior) + varnames = [v.name for v in model.vars] + + with model: + strace = NDArray(model) + strace.setup(lenght_pos, 0) + for i in range(lenght_pos): + value = [] + size = 0 + for var in varnames: + shape, new_size = var_info[var] + value.append(posterior[i][size:size+new_size].reshape(shape)) + size += new_size + strace.record({k: v for k, v in zip(varnames, value)}) + return MultiTrace([strace]) + + def logp_forw(out_vars, vars, shared): """Compile Theano function of the model and the input and output variables. diff --git a/pymc3/tests/test_smc.py b/pymc3/tests/test_smc.py index ceea22c790..90732f8645 100644 --- a/pymc3/tests/test_smc.py +++ b/pymc3/tests/test_smc.py @@ -1,24 +1,15 @@ import pymc3 as pm import numpy as np -from pymc3.backends.smc_text import TextStage -import pytest -from tempfile import mkdtemp -import shutil import theano.tensor as tt -import theano from .helpers import SeededTest -@pytest.mark.xfail(condition=(theano.config.floatX == "float32"), reason="Fails on float32") class TestSMC(SeededTest): def setup_class(self): super(TestSMC, self).setup_class() - self.test_folder = mkdtemp(prefix='ATMIP_TEST') - - self.samples = 1500 - self.chains = 200 + self.samples = 1000 n = 4 mu1 = np.ones(n) * (1. / 2) mu2 = - mu1 @@ -40,44 +31,35 @@ def two_gaussians(x): - 0.5 * (x - mu2).T.dot(isigma).dot(x - mu2) return tt.log(w1 * tt.exp(log_like1) + w2 * tt.exp(log_like2)) - with pm.Model() as self.ATMIP_test: + with pm.Model() as self.SMC_test: X = pm.Uniform('X', lower=-2, upper=2., shape=n) llk = pm.Potential('muh', two_gaussians(X)) self.muref = mu1 - @pytest.mark.parametrize(['cores', 'stage'], [[1, 0], [2, 6]]) - def test_sample_n_core(self, cores, stage): - step_kwargs = {'homepath': self.test_folder, 'stage': stage} - with self.ATMIP_test: + def test_sample(self): + with self.SMC_test: mtrace = pm.sample(draws=self.samples, - chains=self.chains, - cores=cores, - step = pm.SMC(), - step_kwargs=step_kwargs) + step = pm.SMC()) - x = mtrace.get_values('X') + x = mtrace['X'] mu1d = np.abs(x).mean(axis=0) np.testing.assert_allclose(self.muref, mu1d, rtol=0., atol=0.03) - # Scenario IV Ching, J. & Chen, Y. 2007 - #assert np.round(np.log(self.ATMIP_test.marginal_likelihood)) == -12.0 - - def test_stage_handler(self): - stage_number = -1 - stage_handler = TextStage(self.test_folder) - step = stage_handler.load_atmip_params(stage_number, model=self.ATMIP_test) - assert step.stage == stage_number + def test_ml(self): + data = np.repeat([1, 0], [50, 50]) + marginals = [] + a_prior_0, b_prior_0 = 1., 1. + a_prior_1, b_prior_1 = 20., 20. - corrupted_chains = stage_handler.recover_existing_results(stage_number, - self.samples / self.chains, - self.chains, - step, - model=self.ATMIP_test) - assert len(corrupted_chains) == self.chains + for alpha, beta in ((a_prior_0, b_prior_0), (a_prior_1, b_prior_1)): + with pm.Model() as model: + a = pm.Beta('a', alpha, beta) + y = pm.Bernoulli('y', a, observed=data) + trace = pm.sample(2000, step=pm.SMC()) + marginals.append(model.marginal_likelihood) + # compare to the analytical result + assert abs((marginals[1] / marginals[0]) - 4.0) <= 1 - rtrace = stage_handler.load_result_trace(model=self.ATMIP_test) - def teardown_class(self): - shutil.rmtree(self.test_folder) diff --git a/pymc3/tests/test_step.py b/pymc3/tests/test_step.py index c352f95d56..2372803b9c 100644 --- a/pymc3/tests/test_step.py +++ b/pymc3/tests/test_step.py @@ -9,7 +9,7 @@ from pymc3.step_methods import (NUTS, BinaryGibbsMetropolis, CategoricalGibbsMetropolis, Metropolis, Slice, CompoundStep, NormalProposal, MultivariateNormalProposal, HamiltonianMC, - EllipticalSlice, smc, DEMetropolis) + EllipticalSlice, SMC, DEMetropolis) from pymc3.theanof import floatX from pymc3.distributions import ( Binomial, Normal, Bernoulli, Categorical, Beta, HalfNormal) @@ -25,139 +25,136 @@ class TestStepMethods(object): # yield test doesn't work subclassing object master_samples = { - Slice: np.array([ - -5.95252353e-01, -1.81894861e-01, -4.98211488e-01, - -1.02262800e-01, -4.26726030e-01, 1.75446860e+00, - -1.30022548e+00, 8.35658004e-01, 8.95879638e-01, - -8.85214481e-01, -6.63530918e-01, -8.39303080e-01, - 9.42792225e-01, 9.03554344e-01, 8.45254684e-01, - -1.43299803e+00, 9.04897201e-01, -1.74303131e-01, - -6.38611581e-01, 1.50013968e+00, 1.06864438e+00, - -4.80484421e-01, -7.52199709e-01, 1.95067495e+00, - -3.67960104e+00, 2.49291588e+00, -2.11039152e+00, - 1.61674758e-01, -1.59564182e-01, 2.19089873e-01, - 1.88643940e+00, 4.04098154e-01, -4.59352326e-01, - -9.06370675e-01, 5.42817654e-01, 6.99040611e-03, - 1.66396391e-01, -4.74549281e-01, 8.19064437e-02, - 1.69689952e+00, -1.62667304e+00, 1.61295808e+00, - 1.30099144e+00, -5.46722750e-01, -7.87745494e-01, - 7.91027521e-01, -2.35706976e-02, 1.68824376e+00, - 7.10566880e-01, -7.23551374e-01, 8.85613069e-01, - -1.27300146e+00, 1.80274430e+00, 9.34266276e-01, - 2.40427061e+00, -1.85132552e-01, 4.47234196e-01, - -9.81894859e-01, -2.83399706e-01, 1.84717533e+00, - -1.58593284e+00, 3.18027270e-02, 1.40566006e+00, - -9.45758714e-01, 1.18813188e-01, -1.19938604e+00, - -8.26038466e-01, 5.03469984e-01, -4.72742758e-01, - 2.27820946e-01, -1.02608915e-03, -6.02507158e-01, - 7.72739682e-01, 7.16064505e-01, -1.63693490e+00, - -3.97161966e-01, 1.17147944e+00, -2.87796982e+00, - -1.59533297e+00, 6.73096114e-01, -3.34397247e-01, - 1.22357427e-01, -4.57299104e-02, 1.32005771e+00, - -1.29910645e+00, 8.16168850e-01, -1.47357594e+00, - 1.34688446e+00, 1.06377551e+00, 4.34296696e-02, - 8.23143354e-01, 8.40906324e-01, 1.88596864e+00, - 5.77120694e-01, 2.71732927e-01, -1.36217979e+00, - 2.41488213e+00, 4.68298379e-01, 4.86342250e-01, - -8.43949966e-01]), - HamiltonianMC: np.array([ - 0.40608634, 0.40608634, 0.04610354, -0.78588609, 0.03773683, - -0.49373368, 0.21708042, 0.21708042, -0.14413517, -0.68284611, - 0.76299659, 0.24128663, 0.24128663, -0.54835464, -0.84408365, - -0.82762589, -0.67429432, -0.67429432, -0.57900517, -0.97138029, - -0.37809745, -0.37809745, -0.19333181, -0.40329098, 0.54999765, - 1.171515 , 0.90279792, 0.90279792, 1.63830503, -0.90436674, - -0.02516293, -0.02516293, -0.22177082, -0.28061216, -0.10158021, - 0.0807234 , 0.16994063, 0.16994063, 0.4141503 , 0.38505666, - -0.25936504, 2.12074192, 2.24467132, 0.9628703 , -1.37044749, - 0.32983336, -0.55317525, -0.55317525, -0.40295662, -0.40295662, - -0.40295662, 0.49076931, 0.04234407, -1.0002905 , 0.99823615, - 0.99823615, 0.24915904, -0.00965061, 0.48519377, 0.21959942, - -0.93094702, -0.93094702, -0.76812553, -0.73699981, -0.91834134, - -0.91834134, 0.79522886, -0.04267669, -0.04267669, 0.51368761, - 0.51368761, 0.02255577, 0.70823409, 0.70823409, 0.73921198, - 0.30295007, 0.30295007, 0.30295007, -0.1300897 , 0.44310964, - -1.35839961, -1.55398633, -0.57323153, -0.57323153, -1.15435458, - -0.17697793, -0.17697793, 0.2925856 , -0.56119025, -0.15360141, - 0.83715916, -0.02340449, -0.02340449, -0.63074456, -0.82745942, - -0.67626237, 1.13814805, -0.81857813, -0.81857813, 0.26367166]), - Metropolis: np.array([ - 1.62434536, 1.01258895, 0.4844172, -0.58855142, 1.15626034, 0.39505344, 1.85716138, - -0.20297933, -0.20297933, -0.20297933, -0.20297933, -1.08083775, -1.08083775, - 0.06388596, 0.96474191, 0.28101405, 0.01312597, 0.54348144, -0.14369126, -0.98889691, - -0.98889691, -0.75448121, -0.94631676, -0.94631676, -0.89550901, -0.89550901, - -0.77535005, -0.15814694, 0.14202338, -0.21022647, -0.4191207, 0.16750249, 0.45308981, - 1.33823098, 1.8511608, 1.55306796, 1.55306796, 1.55306796, 1.55306796, 0.15657163, - 0.3166087, 0.3166087, 0.3166087, 0.3166087, 0.54670343, 0.54670343, 0.32437529, - 0.12361722, 0.32191694, 0.44092559, 0.56274686, 0.56274686, 0.18746191, 0.18746191, - -0.15639177, -0.11279491, -0.11279491, -0.11279491, -1.20770676, -1.03832432, - -0.29776787, -1.25146848, -1.25146848, -0.93630908, -0.5857631, -0.5857631, - -0.62445861, -0.62445861, -0.64907557, -0.64907557, -0.64907557, 0.58708846, - -0.61217957, 0.25116575, 0.25116575, 0.80170324, 1.59451011, 0.97097938, 1.77284041, - 1.81940771, 1.81940771, 1.81940771, 1.81940771, 1.95710892, 2.18960348, 2.18960348, - 2.18960348, 2.18960348, 2.63096792, 2.53081269, 2.5482221, 1.42620337, 0.90910891, - -0.08791792, 0.40729341, 0.23259025, 0.23259025, 0.23259025, 2.76091595, 2.51228118]), - NUTS: np.array( - [ 1.11832371e+00, 1.11832371e+00, 1.11203151e+00, -1.08526075e+00, - 2.58200798e-02, 2.03527183e+00, 4.47644923e-01, 8.95141642e-01, - 7.21867642e-01, 8.61681133e-01, 8.61681133e-01, 3.42001064e-01, - -1.08109692e-01, 1.89399407e-01, 2.76571728e-01, 2.76571728e-01, - -7.49542468e-01, -7.25272156e-01, -5.49940424e-01, -5.49940424e-01, - 4.39045553e-01, -9.79313191e-04, 4.08678631e-02, 4.08678631e-02, - -1.17303762e+00, 4.15335470e-01, 4.80458006e-01, 5.98022153e-02, - 5.26508851e-01, 5.26508851e-01, 6.24759070e-01, 4.55268819e-01, - 8.70608570e-01, 6.56151353e-01, 6.56151353e-01, 1.29968043e+00, - 2.41336915e-01, -7.78824784e-02, -1.15368193e+00, -4.92562283e-01, - -5.16903724e-02, 4.05389240e-01, 4.05389240e-01, 4.20147769e-01, - 6.88161155e-01, 6.59273169e-01, -4.28987827e-01, -4.28987827e-01, - -4.44203783e-01, -4.61330842e-01, -5.23216216e-01, -1.52821368e+00, - 9.84049809e-01, 9.84049809e-01, 1.02081403e+00, -5.60272679e-01, - 4.18620552e-01, 1.92542517e+00, 1.12029984e+00, 6.69152820e-01, - 1.56325611e+00, 6.64640934e-01, -7.43157898e-01, -7.43157898e-01, - -3.18049839e-01, 6.87248073e-01, 6.90665184e-01, 1.63009949e+00, - -4.84972607e-01, -1.04859669e+00, 8.26455763e-01, -1.71696305e+00, - -1.39964174e+00, -3.87677130e-01, -1.85966115e-01, -1.85966115e-01, - 4.54153291e-01, -8.41705332e-01, -8.46831314e-01, -8.46831314e-01, - -1.57419678e-01, -3.89604101e-01, 8.15315055e-01, 2.81141081e-03, - 2.81141081e-03, 3.25839131e-01, 1.33638823e+00, 1.59391112e+00, - -3.91174647e-01, -2.60664979e+00, -2.27637534e+00, -2.81505065e+00, - -2.24238542e+00, -1.01648100e+00, -1.01648100e+00, -7.60912865e-01, - 1.44384812e+00, 2.07355127e+00, 1.91390340e+00, 1.66559696e+00]), - smc.SMC: np.array( - [ 0.61562138, -0.56082978, -0.89760381, 1.47368457, 0.33300527, 0.85567605, - -1.33503519, -1.47996682, -0.3725601, 0.75713321, 1.81055917, 0.39193534, - 0.10083821, 0.55569412, -0.65879812, -0.61545061, -2.65522875, 0.93801687, - 2.40499211, -0.63022535, 0.09565784, -1.00650846, 1.65901231, 0.18429996, - 1.64642521, 0.5589963, -0.40452525, -0.9402324, 0.53813986, 0.55785946, - 1.22966132, 0.2782562, -0.81254158, -0.08076293, -0.29136329, 0.62914226, - 0.16049388, -0.06386387, 1.8103961, -0.98444811, -0.36333739, 0.88703339, - -0.08482673, -0.23224262, -0.11348807, 1.09401682, -0.58594449, -0.12728503, - -0.82408778, -1.82770764, -2.28859404, -0.51814943, -1.53652851, 0.66313366, - 1.61666698, 1.41284768, -0.05129251, 0.96166282, 1.00446145, -0.86380886, - -1.13410885, -0.48311125, -1.25446622, -0.48452779, -0.84647195, -0.43201729, - -1.22186151, 1.18698485, 0.33434434, -0.40650775, 0.47740064, 0.96943022, - 1.15534028, -0.86220564, -0.26049285, -1.17489183, 0.66796904, -1.68920203, - -0.96308602, -1.73542483, -0.84744376, 0.91864514, -0.02724505, 0.16143404, - 0.65747707, -1.49655923, -0.32010575, 1.20255401, 0.1203948, -1.30017822, - 1.55895643, -0.74799042, -1.5938872, 0.69297144, -1.32932843, -0.16886992, - -1.01437109, 0.32476589, 1.02509164, 0.31274278, -0.7908909, 1.18439217, - -0.96132492, -0.4934065, 0.71438293, 0.09829997, 1.81936381, 0.47941016, - 0.3717936, 0.14339747, 1.24288736, 0.92520773, 0.69025067, 0.96618094, - 0.69085402, -1.12128175, 0.11228076, 0.7711306, 0.12859226, 0.65792466, - -0.07422313, 1.74736739, 0.24120104, 0.74946338, 0.66260928, -0.34070258, - 1.09875434, -0.4159233, -0.01607339, 1.20921296, -0.29176047, 0.47367867, - -1.45788116, -0.40198772, 0.44502909, 0.65623719, 0.99422221, 1.37241668, - -0.05163759, 0.82729935, 0.59458429, 1.10870872, -1.00730291, -0.07837131, - -0.28144688, -0.03052015, 1.05263496, 0.19011829, -0.98807301, -0.77388355, - -1.68729554, 0.03018351, 0.39424573, 0.98343413, -1.40600196, 1.19764243, - 1.64712279, 0.68929684, -0.54301669, -0.29369924, 0.09052877, 2.64067523, - -1.25887138, 1.65991714, 0.71271397, -0.50396329, 1.2182173, 0.2472108, - -0.2990774, 0.1646579, 0.21418971, -0.0876372, 0.66714317, -0.43490764, - -2.17899663, -0.2681325, -3.10431098, -1.38211864, 0.02041712, 0.16319981, - -1.02526047, 1.93088335, -0.36975507, -0.61332039, 0.33666881, -0.23766903, - -0.58478679, 1.38941035, -0.45829187, -1.12505096, -1.4814355, 0.61790977, - 0.58867984, 1.38693864, 1.80845772, -1.63246225, -1.48247172, -0.69197631, - 0.65045375, -0.09601979]), + Slice: np.array([ 0.10233528, 0.40458486, 0.17329217, 0.46281232, 0.22556278, + 1.52632836, -0.27823807, 0.02539625, 1.02711735, 0.03686346, + -0.62841281, -0.27125083, 0.31989505, 0.84031155, -0.18949138, + 1.60550262, 1.01375291, -0.29742941, 0.35312738, 0.43363622, + 1.18898078, 0.80063888, 0.38445644, 0.90184395, 1.69150017, + 2.05452171, -0.13334755, 1.61265408, 1.36579345, 1.3216292 , + -0.59487037, -0.34648927, 1.05107285, 0.42870305, 0.61552257, + 0.55239884, 0.13929271, 0.26213809, -0.2316028 , 0.19711046, + 1.42832629, 1.93641434, -0.81142379, -0.31059485, -0.3189694 , + 1.43542534, 0.40311093, 1.63103768, 0.24034874, 0.33924866, + 0.94951616, 0.71700185, 0.79273056, -0.44569146, 1.91974783, + 0.84673795, 1.12411833, -0.83123811, -0.54310095, -0.00721347, + 0.9925055 , 1.04015058, -0.34958074, -0.14926302, -0.47990225, + -0.75629446, -0.95942067, 1.68179204, 1.20598073, 1.39675733, + 1.22755935, 0.06728757, 1.05184231, 1.01126791, -0.67327093, + 0.21429651, 1.33730461, -1.56174184, -0.64348764, 0.98050636, + 0.25923049, 0.58622631, 0.46589069, 1.44367347, -0.43141573, + 1.08293374, -0.5563204 , 1.46287904, 1.26019815, 0.52972104, + 1.08792687, 1.10064358, 1.84881549, 0.91179647, 0.69316592, + -0.47657064, 2.22747063, 0.83388935, 0.84680716, -0.10556406]), + HamiltonianMC: np.array([ 0.43733634, 0.43733634, 0.15955614, -0.44355329, 0.21465731, + 0.30148244, 0.45527282, 0.45527282, 0.41753005, -0.03480236, + 1.16599611, 0.565306 , 0.565306 , 0.0077143 , -0.18291321, + -0.14577946, -0.00703353, -0.00703353, 0.14345194, -0.12345058, + 0.76875516, 0.76875516, 0.84289506, 0.24596225, 0.95287087, + 1.3799335 , 1.1493899 , 1.1493899 , 2.0255982 , -0.77850273, + 0.11604115, 0.11604115, 0.39296557, 0.34826491, 0.5951183 , + 0.63097341, 0.57938784, 0.57938784, 0.76570029, 0.63516046, + 0.23667784, 2.0151377 , 1.92064966, 1.09125654, -0.43716787, + 0.61939595, 0.30566853, 0.30566853, 0.3690641 , 0.3690641 , + 0.3690641 , 1.26497542, 0.90890334, 0.01482818, 0.01482818, + -0.15542473, 0.26475651, 0.32687263, 1.21902207, 0.6708017 , + -0.18867695, -0.18867695, -0.07141329, -0.04631175, -0.16855462, + -0.16855462, 1.05455573, 0.47371825, 0.47371825, 0.86307077, + 0.86307077, 0.51484125, 1.0022533 , 1.0022533 , 1.02370316, + 0.71331829, 0.71331829, 0.71331829, 0.40758664, 0.81307434, + -0.46269741, -0.60284666, 0.06710527, 0.06710527, -0.35055053, + 0.36727629, 0.36727629, 0.69350367, 0.11268647, 0.37681301, + 1.10168386, 0.49559472, 0.49559472, 0.06193658, -0.07947103, + 0.01969434, 1.28470893, -0.13536813, -0.13536813, 0.6575966 ]), + Metropolis: np.array([ 1.62434536, 1.01258895, 0.4844172 , 0.4844172 , 0.4844172 , + 0.4844172 , 0.4844172 , 0.4844172 , 0.4844172 , 0.4844172 , + 0.31198899, 0.31198899, 0.31198899, 0.31198899, 1.21284494, + 0.52911708, 0.261229 , 0.79158447, 0.10441177, -0.74079387, + -0.74079387, -0.50637818, -0.50637818, -0.50637818, -0.45557042, + -0.45557042, -0.33541147, 0.28179164, 0.58196196, 0.22971211, + 0.02081788, 0.60744107, 0.8930284 , 0.8930284 , 1.40595822, + 1.10786538, 1.10786538, 1.10786538, 1.10786538, -0.28863095, + -0.12859388, 0.74757504, 0.74757504, 0.74757504, 0.97766977, + 0.97766977, 0.75534163, 0.55458356, 0.75288328, 0.87189193, + 0.9937132 , 0.9937132 , 0.61842825, 0.61842825, 0.27457457, + 0.31817143, 0.31817143, 0.31817143, -0.77674042, -0.60735798, + 0.13319847, -0.82050213, -0.82050213, -0.50534274, -0.15479676, + -0.15479676, -0.19349227, -0.19349227, -0.21810923, -0.21810923, + -0.21810923, 1.0180548 , -0.18121323, 0.68213209, 0.68213209, + 1.23266958, 1.23266958, 0.60913885, 1.41099989, 1.45756718, + 1.45756718, 1.45756718, 1.45756718, 1.59526839, 1.82776295, + 1.82776295, 1.82776295, 1.82776295, 2.2691274 , 2.16897216, + 2.18638157, 1.06436284, 0.54726838, 0.54726838, 1.04247971, + 0.86777655, 0.86777655, 0.86777655, 0.86777655, 0.61914177]), + NUTS: np.array([ 0.550575 , 0.550575 , 0.80046332, 0.91590059, 1.34621916, + 1.34621916, -0.63917773, -0.65770809, -0.65770809, -0.64512868, + -1.05448153, -0.5225666 , 0.14335153, -0.0034499 , -0.0034499 , + 0.05309212, -0.53186371, 0.29325825, 0.43210854, 0.56284837, + 0.56284837, 0.38041767, 0.47322034, 0.49937368, 0.49937368, + 0.44424258, 0.44424258, -0.02790848, -0.40470145, -0.35725567, + -0.43744228, 0.41955432, 0.31099421, 0.31099421, 0.65811717, + 0.66649398, 0.38493786, 0.54114658, 0.54114658, 0.68222408, + 0.66404942, 1.44143108, 1.15638799, -0.06775775, -0.06775775, + 0.30418561, 0.23543403, 0.57934404, -0.5435111 , -0.47938915, + -0.23816662, 0.36793792, 0.36793792, 0.64980016, 0.52150456, + 0.64643321, 0.26130179, 1.10569077, 1.10569077, 1.23662797, + -0.36928735, -0.14303069, 0.85298904, 0.85298904, 0.31422085, + 0.32113762, 0.32113762, 1.0692238 , 1.0692238 , 1.60127576, + 1.49249738, 1.09065107, 0.84264371, 0.84264371, -0.08832343, + 0.04868027, -0.02679449, -0.02679449, 0.91989101, 0.65754478, + -0.39220625, 0.08379492, 1.03055634, 1.03055634, 1.71071332, + 1.58740483, 1.67905741, 0.77744868, 0.15050587, 0.15050587, + 0.73979127, 0.15445515, 0.13134717, 0.85068974, 0.85068974, + 0.6974799 , 0.16170472, 0.86405959, 0.86405959, -0.22032854]), + SMC: np.array([ 5.10950205e-02, 1.09811720e+00, 1.78330202e-01, 6.85938766e-01, + 1.42354476e-01, -1.59630758e+00, 1.57176810e+00, -4.01398917e-01, + 1.14567871e+00, 1.14954938e+00, 4.94399840e-01, 1.16253017e+00, + 1.17432244e+00, 7.79195162e-01, 1.29017945e+00, 2.53722905e-01, + 5.38589898e-01, 3.52121216e-01, 1.35795966e+00, 1.02086933e-01, + 1.58845251e+00, 6.76852927e-01, -1.04716592e-02, -1.01613324e-01, + 1.37680965e+00, 7.40036542e-01, 2.89069320e-01, 1.48153741e+00, + 9.58156958e-01, 5.73623782e-02, 7.68850721e-01, 3.68643390e-01, + 1.47645964e+00, 2.32596780e-01, -1.85008158e-01, 3.71335958e-01, + 2.68600102e+00, -4.89504443e-01, 6.54265561e-02, 3.80455349e-01, + 1.17875338e+00, 2.30233324e-01, 6.90960231e-01, 8.81668685e-01, + -2.19754340e-01, 1.27686862e-01, 3.28444250e-01, 1.34820635e-01, + 5.29725257e-01, 1.43783915e+00, -1.64754264e-01, 7.41446719e-01, + -1.17733186e+00, 6.01215658e-02, 1.82638158e-01, -2.23232214e-02, + -1.79877583e-02, 8.37949150e-01, 4.41964955e-01, -8.66524743e-01, + 4.90738093e-01, 2.42056488e-01, 4.67699626e-01, 2.91075351e-01, + 1.49541153e+00, 8.30730845e-01, 1.03956404e+00, -5.16162910e-01, + 2.84338859e-01, 1.72305888e+00, 9.52445566e-01, 1.48831718e+00, + 8.03455325e-01, 1.48840970e+00, 6.98122664e-01, 3.30187139e-01, + 7.88029712e-01, 9.31510828e-01, 1.01326878e+00, 2.26637755e-01, + 1.70703646e-01, -8.54429841e-01, 2.97254590e-01, -2.77843274e-01, + -2.25544207e-01, 1.98862826e-02, 5.05953885e-01, 4.98203941e-01, + 1.20897382e+00, -6.32958669e-05, -7.22425896e-01, 1.60930869e+00, + -5.02773645e-01, 2.46405678e+00, 9.16039706e-01, 1.14146060e+00, + -1.95781984e-01, -2.44653942e-01, 2.67851290e-01, 2.37462012e-01, + 6.71471950e-01, 1.18319765e+00, 1.29146530e+00, -3.14177753e-01, + -1.31041215e-02, 1.05029405e+00, 1.31202399e+00, 7.40532839e-02, + 9.15510041e-01, 7.71054604e-01, 9.83483263e-01, 9.03032142e-01, + 9.14191160e-01, 9.32285366e-01, 1.13937607e+00, -4.29155928e-01, + 3.44609229e-02, -5.46423555e-02, 1.34625982e+00, -1.28287047e-01, + -1.55214879e-02, 3.25294234e-01, 1.06120585e+00, -5.09891282e-01, + 1.25789335e+00, 1.01808348e+00, -9.92590713e-01, 1.72832932e+00, + 1.12232980e+00, 8.54801892e-01, 1.41534752e+00, 3.50798405e-01, + 3.69381623e-01, 1.48608411e+00, -1.15506310e-02, 1.57066360e+00, + 2.00747378e-01, 4.47219763e-01, 5.57720524e-01, -7.74295353e-02, + 1.79192501e+00, 7.66510475e-01, 1.38852488e+00, -4.06055122e-01, + 2.73203156e-01, 3.61014687e-01, 1.23574043e+00, 1.64565746e-01, + -9.89896480e-02, 9.26130265e-02, 1.06440134e+00, -1.55890408e-01, + 4.47131846e-01, -7.59186008e-01, -1.50881256e+00, -2.13928005e-01, + -4.19160151e-01, 1.75815544e+00, 7.45423008e-01, 6.94781506e-01, + 1.58596346e+00, 1.75508724e+00, 4.56070434e-01, 2.94128709e-02, + 1.17703970e+00, -9.90230827e-02, 8.42796845e-01, 1.79154944e+00, + 5.92779197e-01, 2.73562285e-01, 1.61597907e+00, 1.23514403e+00, + 4.86261080e-01, -3.10434934e-01, 5.57873722e-01, 6.50365217e-01, + -3.41009850e-01, 9.26851109e-01, 8.28936486e-01, 9.16180689e-02, + 1.30226405e+00, 3.73945789e-01, 6.04560122e-02, 6.00698708e-01, + 9.68764731e-02, 1.41904148e+00, 6.94182961e-03, 3.17504138e-01, + 5.90956041e-01, -5.78113887e-01, 5.26615565e-01, -4.19715252e-01, + 8.92891364e-01, 1.30207363e-01, 4.19899637e-01, 7.10275704e-01, + 9.27418179e-02, 1.85758044e+00, 4.76988907e-01, -1.36341398e-01]), } def setup_class(self): @@ -190,14 +187,12 @@ def check_trace(self, step_method): n_steps = 100 with Model() as model: x = Normal('x', mu=0, sd=1) + y = Normal('y', mu=x, sd=1, observed=1) if step_method.__name__ == 'SMC': trace = sample(draws=200, - chains=2, - start=[{'x':1.}, {'x':-1.}], random_seed=1, progressbar=False, - step=step_method(), - step_kwargs={'homepath': self.temp_dir}) + step=step_method()) elif step_method.__name__ == 'NUTS': step = step_method(scaling=model.test_point) trace = sample(0, tune=n_steps, @@ -207,8 +202,9 @@ def check_trace(self, step_method): trace = sample(0, tune=n_steps, discard_tuned_samples=False, step=step_method(), random_seed=1, chains=1) + assert_array_almost_equal( - trace.get_values('x'), + trace['x'], self.master_samples[step_method], decimal=select_by_precision(float64=6, float32=4))