diff --git a/pymc3/backends/base.py b/pymc3/backends/base.py index 1e0f163d9a..af02c8cbc5 100644 --- a/pymc3/backends/base.py +++ b/pymc3/backends/base.py @@ -4,12 +4,16 @@ creating custom backends). """ import itertools as itl +import logging import numpy as np import warnings import theano.tensor as tt from ..model import modelcontext +from .report import SamplerReport, merge_reports + +logger = logging.getLogger('pymc3') class BackendError(Exception): @@ -61,6 +65,10 @@ def __init__(self, name, model=None, vars=None, test_point=None): self.chain = None self._is_base_setup = False self.sampler_vars = None + self._warnings = [] + + def _add_warnings(self, warnings): + self._warnings.extend(warnings) # Sampling methods @@ -174,7 +182,7 @@ def get_sampler_stats(self, varname, sampler_idx=None, burn=0, thin=1): return self._get_sampler_stats(varname, sampler_idx, burn, thin) sampler_idxs = [i for i, s in enumerate(self.sampler_vars) - if varname in s] + if varname in s] if not sampler_idxs: raise KeyError("Unknown sampler stat %s" % varname) @@ -185,20 +193,19 @@ def get_sampler_stats(self, varname, sampler_idx=None, burn=0, thin=1): else: return vals - def _get_sampler_stats(self, varname, sampler_idx, burn, thin): """Get sampler statistics.""" raise NotImplementedError() def _slice(self, idx): """Slice trace object.""" - raise NotImplementedError + raise NotImplementedError() def point(self, idx): """Return dictionary of point values at `idx` for current chain with variables names as keys. """ - raise NotImplementedError + raise NotImplementedError() @property def stat_names(self): @@ -258,6 +265,11 @@ def __init__(self, straces): raise ValueError("Chains are not unique.") self._straces[strace.chain] = strace + self._report = SamplerReport() + for strace in straces: + if hasattr(strace, '_warnings'): + self._report._add_warnings(strace._warnings, strace.chain) + def __repr__(self): template = '<{}: {} chains, {} iterations, {} variables>' return template.format(self.__class__.__name__, @@ -271,6 +283,10 @@ def nchains(self): def chains(self): return list(sorted(self._straces.keys())) + @property + def report(self): + return self._report + def __getitem__(self, idx): if isinstance(idx, slice): return self._slice(idx) @@ -303,7 +319,7 @@ def __getitem__(self, idx): raise KeyError("Unknown variable %s" % var) _attrs = set(['_straces', 'varnames', 'chains', 'stat_names', - 'supports_sampler_stats']) + 'supports_sampler_stats', '_report']) def __getattr__(self, name): # Avoid infinite recursion when called before __init__ @@ -447,10 +463,13 @@ def get_sampler_stats(self, varname, burn=0, thin=1, combine=True, for chain in chains] return _squeeze_cat(results, combine, squeeze) - def _slice(self, idx): - """Return a new MultiTrace object sliced according to `idx`.""" - new_traces = [trace._slice(idx) for trace in self._straces.values()] - return MultiTrace(new_traces) + def _slice(self, slice): + """Return a new MultiTrace object sliced according to `slice`.""" + new_traces = [trace._slice(slice) for trace in self._straces.values()] + trace = MultiTrace(new_traces) + idxs = slice.indices(len(self)) + trace._report = self._report._slice(*idxs) + return trace def point(self, idx, chain=None): """Return a dictionary of point values at `idx`. @@ -502,6 +521,7 @@ def merge_traces(mtraces): if new_chain in base_mtrace._straces: raise ValueError("Chains are not unique.") base_mtrace._straces[new_chain] = strace + base_mtrace.report = merge_reports([trace.report for trace in mtraces]) return base_mtrace diff --git a/pymc3/backends/ndarray.py b/pymc3/backends/ndarray.py index 9c07060c40..700722a787 100644 --- a/pymc3/backends/ndarray.py +++ b/pymc3/backends/ndarray.py @@ -116,8 +116,9 @@ def close(self): self.samples = {var: vtrace[:self.draw_idx] for var, vtrace in self.samples.items()} if self._stats is not None: - self._stats = [{var: trace[:self.draw_idx] for var, trace in stats.items()} - for stats in self._stats] + self._stats = [ + {var: trace[:self.draw_idx] for var, trace in stats.items()} + for stats in self._stats] # Selection methods diff --git a/pymc3/backends/report.py b/pymc3/backends/report.py new file mode 100644 index 0000000000..52ca87590f --- /dev/null +++ b/pymc3/backends/report.py @@ -0,0 +1,162 @@ +from collections import namedtuple +import logging +import enum + + +logger = logging.getLogger('pymc3') + + +@enum.unique +class WarningType(enum.Enum): + # For HMC and NUTS + DIVERGENCE = 1 + TUNING_DIVERGENCE = 2 + DIVERGENCES = 3 + TREEDEPTH = 4 + # Problematic sampler parameters + BAD_PARAMS = 5 + # Indications that chains did not converge, eg Rhat + CONVERGENCE = 6 + BAD_ACCEPTANCE = 7 + + +SamplerWarning = namedtuple( + 'SamplerWarning', + "kind, message, level, step, exec_info, extra") + + +_LEVELS = { + 'info': logging.INFO, + 'error': logging.ERROR, + 'warn': logging.WARN, + 'debug': logging.DEBUG, + 'critical': logging.CRITICAL, +} + + +class SamplerReport(object): + def __init__(self): + self._chain_warnings = {} + self._global_warnings = [] + self._effective_n = None + self._gelman_rubin = None + + @property + def _warnings(self): + chains = sum(self._chain_warnings.values(), []) + return chains + self._global_warnings + + @property + def ok(self): + """Whether the automatic convergence checks found serious problems.""" + return all(_LEVELS[warn.level] < _LEVELS['warn'] + for warn in self._warnings) + + def raise_ok(self, level='error'): + errors = [warn for warn in self._warnings + if _LEVELS[warn.level] >= _LEVELS[level]] + if errors: + raise ValueError('Serious convergence issues during sampling.') + + def _run_convergence_checks(self, trace): + if trace.nchains == 1: + msg = ("Only one chain was sampled, this makes it impossible to " + "run some convergence checks") + warn = SamplerWarning(WarningType.BAD_PARAMS, msg, 'info', + None, None, None) + self._add_warnings([warn]) + return + + from pymc3 import diagnostics + + self._effective_n = effective_n = diagnostics.effective_n(trace) + self._gelman_rubin = gelman_rubin = diagnostics.gelman_rubin(trace) + + warnings = [] + rhat_max = max(val.max() for val in gelman_rubin.values()) + if rhat_max > 1.4: + msg = ("The gelman-rubin statistic is larger than 1.4 for some " + "parameters. The sampler did not converge.") + warn = SamplerWarning( + WarningType.CONVERGENCE, msg, 'error', None, None, gelman_rubin) + warnings.append(warn) + elif rhat_max > 1.2: + msg = ("The gelman-rubin statistic is larger than 1.2 for some " + "parameters.") + warn = SamplerWarning( + WarningType.CONVERGENCE, msg, 'warn', None, None, gelman_rubin) + warnings.append(warn) + elif rhat_max > 1.05: + msg = ("The gelman-rubin statistic is larger than 1.05 for some " + "parameters. This indicates slight problems during " + "sampling.") + warn = SamplerWarning( + WarningType.CONVERGENCE, msg, 'info', None, None, gelman_rubin) + warnings.append(warn) + + eff_min = min(val.min() for val in effective_n.values()) + n_samples = len(trace) * trace.nchains + if eff_min < 200 and n_samples >= 500: + msg = ("The estimated number of effective samples is smaller than " + "200 for some parameters.") + warn = SamplerWarning( + WarningType.CONVERGENCE, msg, 'error', None, None, effective_n) + warnings.append(warn) + elif eff_min / n_samples < 0.25: + msg = ("The number of effective samples is smaller than " + "25% for some parameters.") + warn = SamplerWarning( + WarningType.CONVERGENCE, msg, 'warn', None, None, effective_n) + warnings.append(warn) + + self._add_warnings(warnings) + + def _add_warnings(self, warnings, chain=None): + if chain is None: + warn_list = self._global_warnings + else: + warn_list = self._chain_warnings.setdefault(chain, []) + warn_list.extend(warnings) + + def _log_summary(self): + + def log_warning(warn): + level = _LEVELS[warn.level] + logger.log(level, warn.message) + + for chain, warns in self._chain_warnings.items(): + for warn in warns: + log_warning(warn) + for warn in self._global_warnings: + log_warning(warn) + + def _slice(self, start, stop, step): + report = SamplerReport() + + def filter_warns(warnings): + filtered = [] + for warn in warnings: + if warn.step is None: + filtered.append(warn) + elif (start <= warn.step < stop and + (warn.step - start) % step == 0): + warn = warn._replace(step=warn.step - start) + filtered.append(warn) + return filtered + + report._add_warnings(filter_warns(self._global_warnings)) + for chain in self._chain_warnings: + report._add_warnings( + filter_warns(self._chain_warnings[chain]), + chain) + + return report + + +def merge_reports(reports): + report = SamplerReport() + for rep in reports: + report._add_warnings(rep._global_warnings) + for chain in rep._chain_warnings: + report._add_warnings(rep._chain_warnings[chain], chain) + return report diff --git a/pymc3/diagnostics.py b/pymc3/diagnostics.py index 322108be37..64188deb43 100644 --- a/pymc3/diagnostics.py +++ b/pymc3/diagnostics.py @@ -250,8 +250,8 @@ def get_neff(x, Vhat): if t % 2: t -= 1 - return min(num_chains * num_samples, - int(num_chains * num_samples / (1. + 2 * rho[1:t-1].sum()))) + neff = num_chains * num_samples / (1. + 2 * rho[1:t-1].sum()) + return min(num_chains * num_samples, np.floor(neff)) def generate_neff(trace_values): x = np.array(trace_values) diff --git a/pymc3/model.py b/pymc3/model.py index cf9c3987ae..a834698fe3 100644 --- a/pymc3/model.py +++ b/pymc3/model.py @@ -10,7 +10,7 @@ from theano import theano, tensor as tt from theano.tensor.var import TensorVariable -from pymc3.theanof import set_theano_conf +from pymc3.theanof import set_theano_conf, floatX import pymc3 as pm from pymc3.math import flatten_list from .memoize import memoize, WithMemoization @@ -1061,13 +1061,13 @@ def _get_scaling(total_size, shape, ndim): scalar """ if total_size is None: - coef = pm.floatX(1) + coef = floatX(1) elif isinstance(total_size, int): if ndim >= 1: denom = shape[0] else: denom = 1 - coef = pm.floatX(total_size) / pm.floatX(denom) + coef = floatX(total_size) / floatX(denom) elif isinstance(total_size, (list, tuple)): if not all(isinstance(i, int) for i in total_size if (i is not Ellipsis and i is not None)): raise TypeError('Unrecognized `total_size` type, expected ' @@ -1085,20 +1085,20 @@ def _get_scaling(total_size, shape, ndim): raise ValueError('Length of `total_size` is too big, ' 'number of scalings is bigger that ndim, got %r' % total_size) elif (len(begin) + len(end)) == 0: - return pm.floatX(1) + return floatX(1) if len(end) > 0: shp_end = shape[-len(end):] else: shp_end = np.asarray([]) shp_begin = shape[:len(begin)] - begin_coef = [pm.floatX(t) / shp_begin[i] for i, t in enumerate(begin) if t is not None] - end_coef = [pm.floatX(t) / shp_end[i] for i, t in enumerate(end) if t is not None] + begin_coef = [floatX(t) / shp_begin[i] for i, t in enumerate(begin) if t is not None] + end_coef = [floatX(t) / shp_end[i] for i, t in enumerate(end) if t is not None] coefs = begin_coef + end_coef coef = tt.prod(coefs) else: raise TypeError('Unrecognized `total_size` type, expected ' 'int or list of ints, got %r' % total_size) - return tt.as_tensor(pm.floatX(coef)) + return tt.as_tensor(floatX(coef)) class FreeRV(Factor, TensorVariable): diff --git a/pymc3/sampling.py b/pymc3/sampling.py index 71a4947a0b..2d0d2771f8 100644 --- a/pymc3/sampling.py +++ b/pymc3/sampling.py @@ -1,23 +1,25 @@ from collections import defaultdict, Iterable from copy import copy import pickle -from six import integer_types +import logging +import warnings +from six import integer_types from joblib import Parallel, delayed import numpy as np -import warnings import theano.gradient as tg -import pymc3 as pm from .backends.base import BaseTrace, MultiTrace from .backends.ndarray import NDArray -from .model import modelcontext, Point +from .model import modelcontext, Point, all_continuous from .step_methods import (NUTS, HamiltonianMC, Metropolis, BinaryMetropolis, BinaryGibbsMetropolis, CategoricalGibbsMetropolis, Slice, CompoundStep, arraystep) from .util import update_start_vals from .vartypes import discrete_types from pymc3.step_methods.hmc import quadpotential +from pymc3 import plots +import pymc3 as pm from tqdm import tqdm import sys @@ -29,6 +31,9 @@ BinaryGibbsMetropolis, Slice, CategoricalGibbsMetropolis) +_log = logging.getLogger('pymc3') + + def instantiate_steppers(model, steps, selected_steps, step_kwargs=None): """Instantiates steppers assigned to the model variables. @@ -144,17 +149,17 @@ def assign_step_methods(model, step=None, methods=STEP_METHODS, return instantiate_steppers(model, steps, selected_steps, step_kwargs) -def print_step_hierarchy(s, level=0): +def _print_step_hierarchy(s, level=0): if isinstance(s, (list, tuple)): - pm._log.info('>' * level + 'list') + _log.info('>' * level + 'list') for i in s: - print_step_hierarchy(i, level+1) + _print_step_hierarchy(i, level+1) elif isinstance(s, CompoundStep): - pm._log.info('>' * level + 'CompoundStep') + _log.info('>' * level + 'CompoundStep') for i in s.methods: - print_step_hierarchy(i, level+1) + _print_step_hierarchy(i, level+1) else: - pm._log.info('>' * level + '{}: {}'.format(s.__class__.__name__, s.vars)) + _log.info('>' * level + '{}: {}'.format(s.__class__.__name__, s.vars)) def _cpu_count(): @@ -182,7 +187,7 @@ def sample(draws=500, step=None, init='auto', n_init=200000, start=None, trace=None, chain_idx=0, chains=None, njobs=None, tune=500, nuts_kwargs=None, step_kwargs=None, progressbar=True, model=None, random_seed=None, live_plot=False, discard_tuned_samples=True, - live_plot_kwargs=None, **kwargs): + live_plot_kwargs=None, compute_convergence_checks=True, **kwargs): """Draw samples from the posterior using the given step methods. Multiple step methods are supported via compound step methods. @@ -288,6 +293,9 @@ def sample(draws=500, step=None, init='auto', n_init=200000, start=None, Options for traceplot. Example: live_plot_kwargs={'varnames': ['x']} discard_tuned_samples : bool Whether to discard posterior samples of the tune interval. + compute_convergence_checks : bool, default=True + Whether to compute sampler statistics like gelman-rubin and + effective_n. Returns ------- @@ -331,12 +339,14 @@ def sample(draws=500, step=None, init='auto', n_init=200000, start=None, np.random.seed(random_seed) random_seed = [np.random.randint(2 ** 30) for _ in range(chains)] if not isinstance(random_seed, Iterable): - raise TypeError('Invalid value for `random_seed`. Must be tuple, list or int') + raise TypeError( + 'Invalid value for `random_seed`. Must be tuple, list or int') if 'chain' in kwargs: chain_idx = kwargs['chain'] - warnings.warn("The chain argument has been deprecated. Use chain_idx instead.", - DeprecationWarning) + warnings.warn( + "The chain argument has been deprecated. Use chain_idx instead.", + DeprecationWarning) if start is not None: for start_vals in start: @@ -352,10 +362,10 @@ def sample(draws=500, step=None, init='auto', n_init=200000, start=None, if model.ndim == 0: raise ValueError('The model does not contain any free variables.') - if step is None and init is not None and pm.model.all_continuous(model.vars): + if step is None and init is not None and all_continuous(model.vars): try: # By default, try to use NUTS - pm._log.info('Auto-assigning NUTS sampler...') + _log.info('Auto-assigning NUTS sampler...') args = step_kwargs if step_kwargs is not None else {} args = args.get('nuts', {}) start_, step = init_nuts(init=init, chains=chains, n_init=n_init, @@ -365,9 +375,9 @@ def sample(draws=500, step=None, init='auto', n_init=200000, start=None, start = start_ except (AttributeError, NotImplementedError, tg.NullTypeGradError): # gradient computation failed - pm._log.info("Initializing NUTS failed. " - "Falling back to elementwise auto-assignment.") - pm._log.debug('Exception in init nuts', exec_info=True) + _log.info("Initializing NUTS failed. " + "Falling back to elementwise auto-assignment.") + _log.debug('Exception in init nuts', exec_info=True) step = assign_step_methods(model, step, step_kwargs=step_kwargs) else: step = assign_step_methods(model, step, step_kwargs=step_kwargs) @@ -403,33 +413,39 @@ def sample(draws=500, step=None, init='auto', n_init=200000, start=None, ]) parallel = njobs > 1 and chains > 1 and not has_population_samplers if parallel: - pm._log.info('Multiprocess sampling ({} chains in {} jobs)'.format(chains, njobs)) - print_step_hierarchy(step) + _log.info('Multiprocess sampling ({} chains in {} jobs)'.format(chains, njobs)) + _print_step_hierarchy(step) try: trace = _mp_sample(**sample_args) except pickle.PickleError: - pm._log.warn("Could not pickle model, sampling singlethreaded.") - pm._log.debug('Pickling error:', exec_info=True) + _log.warn("Could not pickle model, sampling singlethreaded.") + _log.debug('Pickling error:', exec_info=True) parallel = False except AttributeError as e: if str(e).startswith("AttributeError: Can't pickle"): - pm._log.warn("Could not pickle model, sampling singlethreaded.") - pm._log.debug('Pickling error:', exec_info=True) + _log.warn("Could not pickle model, sampling singlethreaded.") + _log.debug('Pickling error:', exec_info=True) parallel = False else: raise if not parallel: - if has_population_samplers: - pm._log.info('Population sampling ({} chains)'.format(chains)) - print_step_hierarchy(step) + if has_population_samplers: + _log.info('Population sampling ({} chains)'.format(chains)) + _print_step_hierarchy(step) trace = _sample_population(**sample_args) - else: - pm._log.info('Sequential sampling ({} chains in 1 job)'.format(chains)) - print_step_hierarchy(step) + else: + _log.info('Sequential sampling ({} chains in 1 job)'.format(chains)) + _print_step_hierarchy(step) trace = _sample_many(**sample_args) discard = tune if discard_tuned_samples else 0 - return trace[discard:] + trace = trace[discard:] + + if compute_convergence_checks: + trace.report._run_convergence_checks(trace) + + trace.report._log_summary() + return trace def _check_start_shape(model, start): @@ -459,11 +475,11 @@ def _check_start_shape(model, start): raise ValueError("Bad shape for start argument:{}".format(e)) -def _sample_many(draws, chain, chains, start, random_seed, **kwargs): +def _sample_many(draws, chain, chains, start, random_seed, step, **kwargs): traces = [] for i in range(chains): trace = _sample(draws=draws, chain=chain + i, start=start[i], - random_seed=random_seed[i], **kwargs) + step=step, random_seed=random_seed[i], **kwargs) if trace is None: if len(traces) == 0: raise ValueError('Sampling stopped before a sample was created.') @@ -514,9 +530,9 @@ def _sample(chain, progressbar, random_seed, start, draws=None, step=None, if it >= skip_first: trace = MultiTrace([strace]) if it == skip_first: - ax = pm.plots.traceplot(trace, live_plot=False, **live_plot_kwargs) + ax = plots.traceplot(trace, live_plot=False, **live_plot_kwargs) elif (it - skip_first) % refresh_every == 0 or it == draws - 1: - pm.plots.traceplot(trace, ax=ax, live_plot=True, **live_plot_kwargs) + plots.traceplot(trace, ax=ax, live_plot=True, **live_plot_kwargs) except KeyboardInterrupt: pass finally: @@ -575,7 +591,7 @@ def _iter_sample(draws, step, start=None, trace=None, chain=0, tune=None, if random_seed is not None: np.random.seed(random_seed) if draws < 1: - raise ValueError('Argument `draws` should be above 0.') + raise ValueError('Argument `draws` must be greater than 0.') if start is None: start = {} @@ -600,6 +616,7 @@ def _iter_sample(draws, step, start=None, trace=None, chain=0, tune=None, strace.setup(draws, chain) try: + step.tune = bool(tune) for i in range(draws): if i == tune: step = stop_tuning(step) @@ -615,16 +632,18 @@ def _iter_sample(draws, step, start=None, trace=None, chain=0, tune=None, yield strace except KeyboardInterrupt: strace.close() - if hasattr(step, 'report'): - step.report._finalize(strace) + if hasattr(step, 'warnings'): + warns = step.warnings(strace) + strace._add_warnings(warns) raise except BaseException: strace.close() raise else: strace.close() - if hasattr(step, 'report'): - step.report._finalize(strace) + if hasattr(step, 'warnings'): + warns = step.warnings(strace) + strace._add_warnings(warns) class PopulationStepper(object): @@ -651,7 +670,7 @@ def __init__(self, steppers, parallelize): if parallelize and sys.version_info >= (3,4): try: # configure a child process for each stepper - pm._log.info('Attempting to parallelize chains.') + _log.info('Attempting to parallelize chains.') import multiprocessing for c, stepper in enumerate(tqdm(steppers)): slave_end, master_end = multiprocessing.Pipe() @@ -670,16 +689,19 @@ def __init__(self, steppers, parallelize): self._master_ends.append(master_end) self._processes.append(process) self.is_parallelized = True - except: - pm._log.info('Population parallelization failed. ' \ - 'Falling back to sequential stepping of chains.') + except Exception: + _log.info('Population parallelization failed. ' + 'Falling back to sequential stepping of chains.') + _log.debug('Error was: ', exec_info=True) else: if parallelize: - warnings.warn('Population parallelization is only supported on Python 3.4 and ' \ - 'higher. All {} chains will step on one process.'.format(self.nchains)) + warnings.warn('Population parallelization is only supported ' + 'on Python 3.4 and higher. All {} chains will ' + 'run sequentially on one process.' + .format(self.nchains)) else: - pm._log.info('Chains are not parallelized. You can enable this by passing ' \ - 'pm.sample(parallelize=True).') + _log.info('Chains are not parallelized. You can enable this by passing ' + 'pm.sample(parallelize=True).') return super(PopulationStepper, self).__init__() def __enter__(self): @@ -693,8 +715,8 @@ def __exit__(self, exc_type, exc_val, exc_tb): master_end.send(None) for process in self._processes: process.join(timeout=3) - except: - pm._log.warning('Termination failed.') + except Exception: + _log.warning('Termination failed.') return @staticmethod @@ -737,7 +759,7 @@ def _run_slave(c, stepper_dumps, slave_end): update = stepper.step(population[c]) slave_end.send(update) except Exception: - pm._log.exception('ChainWalker{}'.format(c)) + _log.exception('ChainWalker{}'.format(c)) return def step(self, tune_stop, population): @@ -990,7 +1012,7 @@ def sample_ppc(trace, samples=None, model=None, vars=None, size=None, np.random.seed(random_seed) - indices = np.random.randint(0, nchain*len_trace, samples) + indices = np.random.randint(0, nchain * len_trace, samples) if progressbar: indices = tqdm(indices, total=samples) @@ -1077,7 +1099,7 @@ def sample_ppc_w(traces, samples=None, models=None, weights=None, weights = np.asarray(weights) p = weights / np.sum(weights) - min_tr = min([len(i)*i.nchains for i in traces]) + min_tr = min([len(i) * i.nchains for i in traces]) n = (min_tr * p).astype('int') # ensure n sum up to min_tr @@ -1092,7 +1114,7 @@ def sample_ppc_w(traces, samples=None, models=None, weights=None, except AttributeError: nchain = 1 - indices = np.random.randint(0, nchain*len_trace, j) + indices = np.random.randint(0, nchain * len_trace, j) if nchain > 1: chain_idx, point_idx = np.divmod(indices, len_trace) for idx in zip(chain_idx, point_idx): @@ -1104,7 +1126,8 @@ def sample_ppc_w(traces, samples=None, models=None, weights=None, obs = [x for m in models for x in m.observed_RVs] variables = np.repeat(obs, n) - lenghts = list(set([np.shape(np.atleast_1d(o.distribution.default())) for o in obs])) + lengths = [np.atleast_1d(observed).shape for observed in obs] + lenghts = list(set(lengths)) if len(lenghts) == 1: size = [None for i in variables] @@ -1116,8 +1139,8 @@ def sample_ppc_w(traces, samples=None, models=None, weights=None, y = np.zeros(shape=lenghts[1]) b = np.broadcast(x, y) for var in variables: - l = np.shape(np.atleast_1d(var.distribution.default())) - if l != b.shape: + shape = np.shape(np.atleast_1d(var.distribution.default())) + if shape != b.shape: size.append(b.shape) else: size.append(None) @@ -1170,8 +1193,8 @@ def init_nuts(init='auto', chains=1, n_init=500000, model=None, a diagonal based on the variance of the tuning samples. All chains use the test value (usually the prior mean) as starting point. - * jitter+adapt_diag : Same as `adapt_diag`, but add uniform jitter - in [-1, 1] to the starting point in each chain. + * jitter+adapt_diag : Same as `adapt_diag`, but use uniform jitter + in [-1, 1] as starting point in each chain. * advi+adapt_diag : Run ADVI and then adapt the resulting diagonal mass matrix based on the sample variance of the tuning samples. * advi+adapt_diag_grad : Run ADVI and then adapt the resulting @@ -1202,12 +1225,12 @@ def init_nuts(init='auto', chains=1, n_init=500000, model=None, nuts_sampler : pymc3.step_methods.NUTS Instantiated and initialized NUTS sampler object """ - model = pm.modelcontext(model) + model = modelcontext(model) vars = kwargs.get('vars', model.vars) if set(vars) != set(model.vars): raise ValueError('Must use init_nuts on all variables of a model.') - if not pm.model.all_continuous(vars): + if not all_continuous(vars): raise ValueError('init_nuts can only be used for models with only ' 'continuous variables.') @@ -1220,7 +1243,7 @@ def init_nuts(init='auto', chains=1, n_init=500000, model=None, if init == 'auto': init = 'jitter+adapt_diag' - pm._log.info('Initializing NUTS using {}...'.format(init)) + _log.info('Initializing NUTS using {}...'.format(init)) if random_seed is not None: random_seed = int(np.atleast_1d(random_seed)[0]) @@ -1325,7 +1348,8 @@ def init_nuts(init='auto', chains=1, n_init=500000, model=None, start = list(np.random.choice(init_trace, chains)) potential = quadpotential.QuadPotentialFull(cov) else: - raise NotImplementedError('Initializer {} is not supported.'.format(init)) + raise ValueError( + 'Unknown initializer: {}.'.format(init)) step = pm.NUTS(potential=potential, model=model, **kwargs) diff --git a/pymc3/stats.py b/pymc3/stats.py index 3aae537ed4..1c897d6603 100644 --- a/pymc3/stats.py +++ b/pymc3/stats.py @@ -15,7 +15,6 @@ from scipy.stats import dirichlet from scipy.optimize import minimize -from .backends import tracetab as ttab __all__ = ['autocorr', 'autocov', 'dic', 'bpic', 'waic', 'loo', 'hpd', 'quantiles', 'mc_error', 'summary', 'df_summary', 'compare', 'bfmi', 'r2_score'] @@ -865,6 +864,7 @@ def dict2pd(statdict, labelname): """Small helper function to transform a diagnostics output dict into a pandas Series. """ + from .backends import tracetab as ttab var_dfs = [] for key, value in statdict.items(): var_df = pd.Series(value.flatten()) @@ -964,6 +964,7 @@ def summary(trace, varnames=None, transform=lambda x: x, stat_funcs=None, mu__0 0.066473 0.000312 0.105039 0.214242 mu__1 0.067513 -0.159097 -0.045637 0.062912 """ + from .backends import tracetab as ttab if varnames is None: varnames = get_default_varnames(trace.varnames, diff --git a/pymc3/step_methods/compound.py b/pymc3/step_methods/compound.py index 0dd55b3492..d8f66171cb 100644 --- a/pymc3/step_methods/compound.py +++ b/pymc3/step_methods/compound.py @@ -31,19 +31,9 @@ def step(self, point): point = method.step(point) return point - @property - def report(self): - reports = [] + def warnings(self, strace): + warns = [] for method in self.methods: - if hasattr(method, 'report'): - reports.append(method.report) - return _CompoundReport(reports) - - -class _CompoundReport(object): - def __init__(self, reports): - self._reports = reports - - def _finalize(self, strace): - for report in self._reports: - report._finalize(strace) + if hasattr(method, 'warnings'): + warns.extend(method.warnings(strace)) + return warns diff --git a/pymc3/step_methods/hmc/base_hmc.py b/pymc3/step_methods/hmc/base_hmc.py index 140495d25b..aebb401d34 100644 --- a/pymc3/step_methods/hmc/base_hmc.py +++ b/pymc3/step_methods/hmc/base_hmc.py @@ -1,3 +1,5 @@ +from collections import namedtuple + import numpy as np from pymc3.model import modelcontext, Point @@ -6,6 +8,18 @@ from pymc3.theanof import inputvars, floatX from pymc3.tuning import guess_scaling from .quadpotential import quad_potential, QuadPotentialDiagAdapt +from pymc3.step_methods import step_sizes +from pymc3.backends.report import SamplerWarning, WarningType + + +HMCStepData = namedtuple( + "HMCStepData", + "end, accept_stat, divergence_info, stats") + + +DivergenceInfo = namedtuple( + 'DivergenceInfo', + 'message, exec_info, state') class BaseHMC(arraystep.GradientSharedStep): @@ -15,7 +29,10 @@ class BaseHMC(arraystep.GradientSharedStep): def __init__(self, vars=None, scaling=None, step_scale=0.25, is_cov=False, model=None, blocked=True, potential=None, - integrator="leapfrog", dtype=None, **theano_kwargs): + integrator="leapfrog", dtype=None, Emax=1000, + target_accept=0.8, gamma=0.05, k=0.75, t0=10, + adapt_step_size=True, step_rand=None, + **theano_kwargs): """Set up Hamiltonian samplers with common structures. Parameters @@ -45,8 +62,18 @@ def __init__(self, vars=None, scaling=None, step_scale=0.25, is_cov=False, super(BaseHMC, self).__init__(vars, blocked=blocked, model=model, dtype=dtype, **theano_kwargs) + self.adapt_step_size = adapt_step_size + self.Emax = Emax + self.iter_count = 0 size = self._logp_dlogp_func.size + self.step_size = step_scale / (size ** 0.25) + self.target_accept = target_accept + self.step_adapt = step_sizes.DualAverageAdaptation( + self.step_size, target_accept, gamma, k, t0) + + self.tune = True + if scaling is None and potential is None: mean = floatX(np.zeros(size)) var = floatX(np.ones(size)) @@ -59,10 +86,114 @@ def __init__(self, vars=None, scaling=None, step_scale=0.25, is_cov=False, if scaling is not None and potential is not None: raise ValueError("Can not specify both potential and scaling.") - self.step_size = step_scale / (size ** 0.25) if potential is not None: self.potential = potential else: self.potential = quad_potential(scaling, is_cov) - self.integrator = integration.CpuLeapfrogIntegrator(self.potential, self._logp_dlogp_func) + self.integrator = integration.CpuLeapfrogIntegrator( + self.potential, self._logp_dlogp_func) + + self._step_rand = step_rand + self._warnings = [] + self._samples_after_tune = 0 + self._num_divs_sample = 0 + + def _hamiltonian_step(self, start, p0, step_size): + """Compute one hamiltonian trajectory and return the next state. + + Subclasses must overwrite this method and return a `HMCStepData`. + """ + raise NotImplementedError("Abstract method") + + def astep(self, q0): + """Perform a single HMC iteration.""" + p0 = self.potential.random() + start = self.integrator.compute_state(q0, p0) + + if not np.isfinite(start.energy): + self.potential.raise_ok() + raise ValueError('Bad initial energy: %s. The model ' + 'might be misspecified.' % start.energy) + + adapt_step = self.tune and self.adapt_step_size + step_size = self.step_adapt.current(adapt_step) + self.step_size = step_size + + if self._step_rand is not None: + step_size = self._step_rand(step_size) + + hmc_step = self._hamiltonian_step(start, p0, step_size) + + self.step_adapt.update(hmc_step.accept_stat, adapt_step) + self.potential.update(hmc_step.end.q, hmc_step.end.q_grad, self.tune) + if hmc_step.divergence_info: + info = hmc_step.divergence_info + if self.tune: + kind = WarningType.TUNING_DIVERGENCE + point = None + else: + kind = WarningType.DIVERGENCE + self._num_divs_sample += 1 + # We don't want to fill up all memory with divergence info + if self._num_divs_sample < 100: + point = self._logp_dlogp_func.array_to_dict(info.state.q) + else: + point = None + warning = SamplerWarning( + kind, info.message, 'debug', self.iter_count, + info.exec_info, point) + + self._warnings.append(warning) + + self.iter_count += 1 + if not self.tune: + self._samples_after_tune += 1 + + stats = { + 'tune': self.tune, + 'diverging': bool(hmc_step.divergence_info), + } + + stats.update(hmc_step.stats) + stats.update(self.step_adapt.stats()) + + return hmc_step.end.q, [stats] + + def reset(self, start=None): + self.tune = True + self.potential.reset() + + def warnings(self, strace): + # list.copy() is not available in python2 + warnings = self._warnings[:] + + # Generate a global warning for divergences + n_divs = self._num_divs_sample + if n_divs and self._samples_after_tune == n_divs: + msg = ('The chain contains only diverging samples. The model is ' + 'probably misspecified.') + warning = SamplerWarning( + WarningType.DIVERGENCES, msg, 'error', None, None, None) + warnings.append(warning) + elif n_divs > 0: + message = ('Divergences after tuning. Increase `target_accept` or ' + 'reparameterize.') + warning = SamplerWarning( + WarningType.DIVERGENCES, message, 'error', None, None, None) + warnings.append(warning) + + # small trace + if self._samples_after_tune == 0: + msg = "Tuning was enabled throughout the whole trace." + warning = SamplerWarning( + WarningType.BAD_PARAMS, msg, 'error', None, None, None) + warnings.append(warning) + elif self._samples_after_tune < 500: + msg = "Only %s samples in chain." % self._samples_after_tune + warning = SamplerWarning( + WarningType.BAD_PARAMS, msg, 'error', None, None, None) + warnings.append(warning) + + warnings.extend(self.step_adapt.warnings()) + return warnings diff --git a/pymc3/step_methods/hmc/hmc.py b/pymc3/step_methods/hmc/hmc.py index f9c3441ae8..dd5c05ab12 100644 --- a/pymc3/step_methods/hmc/hmc.py +++ b/pymc3/step_methods/hmc/hmc.py @@ -1,17 +1,15 @@ +import math + import numpy as np -from ..arraystep import metrop_select, Competence -from .base_hmc import BaseHMC +from ..arraystep import Competence from pymc3.vartypes import discrete_types -from pymc3.theanof import floatX +from pymc3.step_methods.hmc.integration import IntegrationError +from pymc3.step_methods.hmc.base_hmc import BaseHMC, HMCStepData, DivergenceInfo __all__ = ['HamiltonianMC'] -# TODO: -# add constraint handling via page 37 of Radford's -# http://www.cs.utoronto.ca/~radford/ham-mcmc.abstract.html - def unif(step_size, elow=.85, ehigh=1.15): return np.random.uniform(elow, ehigh) * step_size @@ -25,8 +23,24 @@ class HamiltonianMC(BaseHMC): name = 'hmc' default_blocked = True - - def __init__(self, vars=None, path_length=2., step_rand=unif, **kwargs): + generates_stats = True + stats_dtypes = [{ + 'step_size': np.float64, + 'n_steps': np.int64, + 'tune': np.bool, + 'step_size_bar': np.float64, + 'accept': np.float64, + 'diverging': np.bool, + 'energy_error': np.float64, + 'energy': np.float64, + 'max_energy_error': np.float64, + 'path_length': np.float64, + 'accepted': np.bool, + }] + + def __init__(self, vars=None, path_length=2., + adapt_step_size=True, gamma=0.05, k=0.75, t0=10, + target_accept=0.8, **kwargs): """Set up the Hamiltonian Monte Carlo sampler. Parameters @@ -50,32 +64,71 @@ def __init__(self, vars=None, path_length=2., step_rand=unif, **kwargs): An object that represents the Hamiltonian with methods `velocity`, `energy`, and `random` methods. It can be specified instead of the scaling matrix. + target_accept : float, default .8 + Adapt the step size such that the average acceptance + probability across the trajectories are close to target_accept. + Higher values for target_accept lead to smaller step sizes. + Setting this to higher values like 0.9 or 0.99 can help + with sampling from difficult posteriors. Valid values are + between 0 and 1 (exclusive). + gamma : float, default .05 + k : float, default .75 + Parameter for dual averaging for step size adaptation. Values + between 0.5 and 1 (exclusive) are admissible. Higher values + correspond to slower adaptation. + t0 : int, default 10 + Parameter for dual averaging. Higher values slow initial + adaptation. + adapt_step_size : bool, default=True + Whether step size adaptation should be enabled. If this is + disabled, `k`, `t0`, `gamma` and `target_accept` are ignored. model : pymc3.Model The model **kwargs : passed to BaseHMC """ super(HamiltonianMC, self).__init__(vars, **kwargs) self.path_length = path_length - self.step_rand = step_rand - def astep(self, q0): - """Perform a single HMC iteration.""" - e = floatX(self.step_rand(self.step_size)) - n_steps = int(self.path_length / e) - - p0 = self.potential.random() - start = self.integrator.compute_state(q0, p0) - - if not np.isfinite(start.energy): - raise ValueError('Bad initial energy: %s. The model ' - 'might be misspecified.' % start.energy) + def _hamiltonian_step(self, start, p0, step_size): + path_length = np.random.rand() * self.path_length + n_steps = max(1, int(path_length / step_size)) + energy_change = -np.inf state = start - for _ in range(n_steps): - state = self.integrator.step(e, state) - - energy_change = start.energy - state.energy - return metrop_select(energy_change, state.q, start.q)[0] + div_info = None + try: + for _ in range(n_steps): + state = self.integrator.step(step_size, state) + except IntegrationError as e: + div_info = DivergenceInfo('Divergence encountered.', e, state) + else: + if not np.isfinite(state.energy): + div_info = DivergenceInfo( + 'Divergence encountered, bad energy.', None, state) + energy_change = start.energy - state.energy + if np.abs(energy_change) > self.Emax: + div_info = DivergenceInfo( + 'Divergence encountered, large integration error.', + None, state) + + accept_stat = min(1, math.exp(energy_change)) + + if div_info is not None or np.random.rand() >= accept_stat: + end = start + accepted = False + else: + end = state + accepted = True + + stats = { + 'path_length': path_length, + 'n_steps': n_steps, + 'accept': accept_stat, + 'energy_error': energy_change, + 'energy': state.energy, + 'accepted': accepted, + } + return HMCStepData(end, accept_stat, div_info, stats) @staticmethod def competence(var, has_grad): diff --git a/pymc3/step_methods/hmc/integration.py b/pymc3/step_methods/hmc/integration.py index 9ed3f4b8b9..4744d1c046 100644 --- a/pymc3/step_methods/hmc/integration.py +++ b/pymc3/step_methods/hmc/integration.py @@ -7,6 +7,10 @@ State = namedtuple("State", 'q, p, v, q_grad, energy') +class IntegrationError(RuntimeError): + pass + + class CpuLeapfrogIntegrator(object): def __init__(self, potential, logp_dlogp_func): """Leapfrog integrator using CPU.""" @@ -46,6 +50,21 @@ def step(self, epsilon, state, out=None): ------- None if `out` is provided, else a State namedtuple """ + try: + return self._step(epsilon, state, out=None) + except linalg.LinAlgError as err: + msg = "LinAlgError during leapfrog step." + raise IntegrationError(msg) + except ValueError as err: + # Raised by many scipy.linalg functions + scipy_msg = "array must not contain infs or nans" + if len(err.args) > 0 and scipy_msg in err.args[0].lower(): + msg = "Infs or nans in scipy.linalg during leapfrog step." + raise IntegrationError(msg) + else: + raise + + def _step(self, epsilon, state, out=None): pot = self._potential axpy = linalg.blas.get_blas_funcs('axpy', dtype=self._dtype) diff --git a/pymc3/step_methods/hmc/nuts.py b/pymc3/step_methods/hmc/nuts.py index 8102cce81a..6a5815743f 100644 --- a/pymc3/step_methods/hmc/nuts.py +++ b/pymc3/step_methods/hmc/nuts.py @@ -1,14 +1,14 @@ +from __future__ import division + from collections import namedtuple -import warnings import numpy as np import numpy.random as nr -from scipy import stats, linalg -import six from ..arraystep import Competence -from pymc3.exceptions import SamplingError -from .base_hmc import BaseHMC +from .base_hmc import BaseHMC, HMCStepData, DivergenceInfo +from .integration import IntegrationError +from pymc3.backends.report import SamplerWarning, WarningType from pymc3.theanof import floatX from pymc3.vartypes import continuous_types @@ -67,8 +67,8 @@ class NUTS(BaseHMC): References ---------- - .. [1] Hoffman, Matthew D., & Gelman, Andrew. (2011). The No-U-Turn Sampler: - Adaptively Setting Path Lengths in Hamiltonian Monte Carlo. + .. [1] Hoffman, Matthew D., & Gelman, Andrew. (2011). The No-U-Turn + Sampler: Adaptively Setting Path Lengths in Hamiltonian Monte Carlo. """ name = 'nuts' @@ -88,10 +88,7 @@ class NUTS(BaseHMC): 'max_energy_error': np.float64, }] - def __init__(self, vars=None, Emax=1000, target_accept=0.8, - gamma=0.05, k=0.75, t0=10, adapt_step_size=True, - max_treedepth=10, on_error='summary', - early_max_treedepth=8, + def __init__(self, vars=None, max_treedepth=10, early_max_treedepth=8, **kwargs): R"""Set up the No-U-Turn sampler. @@ -101,19 +98,25 @@ def __init__(self, vars=None, Emax=1000, target_accept=0.8, Emax : float, default 1000 Maximum energy change allowed during leapfrog steps. Larger deviations will abort the integration. - target_accept : float (0,1), default .8 - Try to find a step size such that the average acceptance + target_accept : float, default .8 + Adapt the step size such that the average acceptance probability across the trajectories are close to target_accept. Higher values for target_accept lead to smaller step sizes. + Setting this to higher values like 0.9 or 0.99 can help + with sampling from difficult posteriors. Valid values are + between 0 and 1 (exclusive). step_scale : float, default 0.25 Size of steps to take, automatically scaled down by `1/n**(1/4)`. If step size adaptation is switched off, the resulting step size is used. If adaptation is enabled, it is used as initial guess. gamma : float, default .05 - k : float (.5,1) default .75 - scaling of speed of adaptation + k : float, default .75 + Parameter for dual averaging for step size adaptation. Values + between 0.5 and 1 (exclusive) are admissible. Higher values + correspond to slower adaptation. t0 : int, default 10 - slows initial adaptation + Parameter for dual averaging. Higher values slow initial + adaptation. adapt_step_size : bool, default=True Whether step size adaptation should be enabled. If this is disabled, `k`, `t0`, `gamma` and `target_accept` are ignored. @@ -132,12 +135,6 @@ def __init__(self, vars=None, Emax=1000, target_accept=0.8, this will be interpreded as the mass or covariance matrix. is_cov : bool, default=False Treat the scaling as mass or covariance matrix. - on_error : {'summary', 'warn', 'raise'}, default='summary' - How to report problems during sampling. - - * `summary`: Print one warning after sampling. - * `warn`: Print individual warnings as soon as they appear. - * `raise`: Raise an error on the first problem. potential : Potential, optional An object that represents the Hamiltonian with methods `velocity`, `energy`, and `random` methods. It can be specified instead @@ -153,42 +150,13 @@ def __init__(self, vars=None, Emax=1000, target_accept=0.8, `pm.sample` to the desired number of tuning steps. """ super(NUTS, self).__init__(vars, **kwargs) - self.Emax = Emax - self.target_accept = target_accept - self.gamma = gamma - self.k = k - self.t0 = t0 - - self.h_bar = 0 - self.mu = np.log(self.step_size * 10) - self.log_step_size = np.log(self.step_size) - self.log_step_size_bar = 0 - self.m = 1 - self.adapt_step_size = adapt_step_size self.max_treedepth = max_treedepth self.early_max_treedepth = early_max_treedepth + self._reached_max_treedepth = 0 - self.tune = True - self.report = NutsReport(on_error, max_treedepth, target_accept) - - def astep(self, q0): - """Perform a single NUTS iteration.""" - p0 = self.potential.random() - start = self.integrator.compute_state(q0, p0) - - if not np.isfinite(start.energy): - raise ValueError('Bad initial energy: %s. The model ' - 'might be misspecified.' % start.energy) - - if not self.adapt_step_size: - step_size = self.step_size - elif self.tune: - step_size = np.exp(self.log_step_size) - else: - step_size = np.exp(self.log_step_size_bar) - - if self.tune and self.m < 200: + def _hamiltonian_step(self, start, p0, step_size): + if self.tune and self.iter_count < 200: max_treedepth = self.early_max_treedepth else: max_treedepth = self.max_treedepth @@ -197,38 +165,16 @@ def astep(self, q0): for _ in range(max_treedepth): direction = logbern(np.log(0.5)) * 2 - 1 - diverging_info, turning = tree.extend(direction) - q, q_grad = tree.proposal.q, tree.proposal.q_grad + divergence_info, turning = tree.extend(direction) - if diverging_info or turning: - if diverging_info: - self.report._add_divergence(self.tune, *diverging_info) + if divergence_info or turning: break + else: + self._reached_max_treedepth += 1 - w = 1. / (self.m + self.t0) - self.h_bar = ((1 - w) * self.h_bar + - w * (self.target_accept - tree.accept_sum * 1. / tree.n_proposals)) - - if self.tune: - self.log_step_size = self.mu - self.h_bar * np.sqrt(self.m) / self.gamma - mk = self.m ** -self.k - self.log_step_size_bar = mk * self.log_step_size + (1 - mk) * self.log_step_size_bar - - self.m += 1 - - if self.tune: - self.potential.adapt(q, q_grad) - - stats = { - 'step_size': step_size, - 'tune': self.tune, - 'step_size_bar': np.exp(self.log_step_size_bar), - 'diverging': bool(diverging_info), - } - - stats.update(tree.stats()) - - return q, [stats] + stats = tree.stats() + accept_stat = stats['mean_tree_accept'] + return HMCStepData(tree.proposal, accept_stat, divergence_info, stats) @staticmethod def competence(var, has_grad): @@ -237,6 +183,17 @@ def competence(var, has_grad): return Competence.IDEAL return Competence.INCOMPATIBLE + def warnings(self, strace): + warnings = super(NUTS, self).warnings(strace) + + if np.mean(self._reached_max_treedepth) > 0.05: + msg = ('The chain reached the maximum tree depth. Increase ' + 'max_treedepth, increase target_accept or reparameterize.') + warn = SamplerWarning(WarningType.TREEDEPTH, msg, 'warn', + None, None, None) + warnings.append(warn) + return warnings + # A proposal for the next position Proposal = namedtuple("Proposal", "q, q_grad, energy, p_accept") @@ -285,10 +242,9 @@ def extend(self, direction): If direction is larger than 0, extend it to the right, otherwise extend it to the left. - Return a tuple `(diverging, turning)`. `diverging` indicates if the - tree extension was aborted because the energy change exceeded - `self.Emax`. If so, it is a tuple containing details about the reason. - Otherwise, it will be `False`. `turning` indicates that + Return a tuple `(diverging, turning)` of type (DivergenceInfo, bool). + `diverging` indicates, that the tree extension was aborted because + the energy change exceeded `self.Emax`. `turning` indicates that the tree extension was stopped because the termination criterior was reached (the trajectory is turning back). """ @@ -325,17 +281,9 @@ def _single_step(self, left, epsilon): """Perform a leapfrog step and handle error cases.""" try: right = self.integrator.step(epsilon, left) - except linalg.LinAlgError as err: - error_msg = "LinAlgError during leapfrog step." + except IntegrationError as err: + error_msg = str(err) error = err - except ValueError as err: - # Raised by many scipy.linalg functions - scipy_msg = "array must not contain infs or nans" - if len(err.args) > 0 and scipy_msg in err.args[0].lower(): - error_msg = "Infs or nans in scipy.linalg during leapfrog step." - error = err - else: - raise else: energy_change = right.energy - self.start_energy if np.isnan(energy_change): @@ -346,25 +294,30 @@ def _single_step(self, left, epsilon): if np.abs(energy_change) < self.Emax: p_accept = min(1, np.exp(-energy_change)) log_size = -energy_change - proposal = Proposal(right.q, right.q_grad, right.energy, p_accept) - tree = Subtree(right, right, right.p, proposal, log_size, p_accept, 1) - return tree, False, False + proposal = Proposal( + right.q, right.q_grad, right.energy, p_accept) + tree = Subtree(right, right, right.p, + proposal, log_size, p_accept, 1) + return tree, None, False else: - error_msg = ("Energy change in leapfrog step is too large: %s. " + error_msg = ("Energy change in leapfrog step is too large: %s." % energy_change) error = None tree = Subtree(None, None, None, None, -np.inf, 0, 1) - return tree, (error_msg, error, left), False + divergance_info = DivergenceInfo(error_msg, error, left) + return tree, divergance_info, False def _build_subtree(self, left, depth, epsilon): if depth == 0: return self._single_step(left, epsilon) - tree1, diverging, turning = self._build_subtree(left, depth - 1, epsilon) + tree1, diverging, turning = self._build_subtree( + left, depth - 1, epsilon) if diverging or turning: return tree1, diverging, turning - tree2, diverging, turning = self._build_subtree(tree1.right, depth - 1, epsilon) + tree2, diverging, turning = self._build_subtree( + tree1.right, depth - 1, epsilon) left, right = tree1.left, tree2.right @@ -385,7 +338,8 @@ def _build_subtree(self, left, depth, epsilon): accept_sum = tree1.accept_sum + tree2.accept_sum n_proposals = tree1.n_proposals + tree2.n_proposals - tree = Subtree(left, right, p_sum, proposal, log_size, accept_sum, n_proposals) + tree = Subtree(left, right, p_sum, proposal, + log_size, accept_sum, n_proposals) return tree, diverging, turning def stats(self): @@ -397,94 +351,3 @@ def stats(self): 'tree_size': self.n_proposals, 'max_energy_error': self.max_energy_change, } - - -class NutsReport(object): - def __init__(self, on_error, max_treedepth, target_accept): - if on_error not in ['summary', 'raise', 'warn']: - raise ValueError('Invalid value for on_error.') - self._on_error = on_error - self._max_treedepth = max_treedepth - self._target_accept = target_accept - self._chain_id = None - self._divs_tune = [] - self._divs_after_tune = [] - - def _add_divergence(self, tuning, msg, error=None, point=None): - if tuning: - self._divs_tune.append((msg, error, point)) - else: - self._divs_after_tune.append((msg, error, point)) - if self._on_error == 'raise': - err = SamplingError('Divergence after tuning: %s Increase ' - 'target_accept or reparameterize.' % msg) - six.raise_from(err, error) - elif self._on_error == 'warn': - warnings.warn('Divergence detected: %s Increase target_accept ' - 'or reparameterize.' % msg) - - def _check_len(self, tuning): - n = (~tuning).sum() - if n < 500: - warnings.warn('Chain %s contains only %s samples.' - % (self._chain_id, n)) - if np.all(tuning): - warnings.warn('Step size tuning was enabled throughout the whole ' - 'trace. You might want to specify the number of ' - 'tuning steps.') - if n > 0 and n == len(self._divs_after_tune): - warnings.warn('Chain %s contains only diverging samples. ' - 'The model is probably misspecified.' - % self._chain_id) - - def _check_accept(self, accept): - mean_accept = np.mean(accept) - target_accept = self._target_accept - # Try to find a reasonable interval for acceptable acceptance - # probabilities. Finding this was mostry trial and error. - n_bound = min(100, len(accept)) - n_good, n_bad = mean_accept * n_bound, (1 - mean_accept) * n_bound - lower, upper = stats.beta(n_good + 1, n_bad + 1).interval(0.95) - if target_accept < lower or target_accept > upper: - warnings.warn('The acceptance probability in chain %s does not ' - 'match the target. It is %s, but should be close ' - 'to %s. Try to increase the number of tuning steps.' - % (self._chain_id, mean_accept, target_accept)) - - def _check_depth(self, depth): - if len(depth) == 0: - return - if np.mean(depth == self._max_treedepth) > 0.05: - warnings.warn('Chain %s reached the maximum tree depth. Increase ' - 'max_treedepth, increase target_accept or ' - 'reparameterize.' % self._chain_id) - - def _check_divergence(self): - n_diverging = len(self._divs_after_tune) - if n_diverging > 0: - warnings.warn("Chain %s contains %s diverging samples after " - "tuning. If increasing `target_accept` does not help " - "try to reparameterize." - % (self._chain_id, n_diverging)) - - def _finalize(self, strace): - """Print warnings for obviously problematic chains.""" - self._chain_id = strace.chain - - if strace.supports_sampler_stats: - tuning = strace.get_sampler_stats('tune') - if tuning.ndim == 2: - tuning = np.any(tuning, axis=-1) - - accept = strace.get_sampler_stats('mean_tree_accept') - if accept.ndim == 2: - accept = np.mean(accept, axis=-1) - - depth = strace.get_sampler_stats('depth') - if depth.ndim == 2: - depth = np.max(depth, axis=-1) - - self._check_len(tuning) - self._check_depth(depth[~tuning]) - self._check_accept(accept[~tuning]) - self._check_divergence() diff --git a/pymc3/step_methods/hmc/quadpotential.py b/pymc3/step_methods/hmc/quadpotential.py index 01b7eb6b1d..1eaf165014 100644 --- a/pymc3/step_methods/hmc/quadpotential.py +++ b/pymc3/step_methods/hmc/quadpotential.py @@ -86,7 +86,7 @@ def random(self, x): def velocity_energy(self, x, v_out): raise NotImplementedError('Abstract method') - def adapt(self, sample, grad): + def update(self, sample, grad, tune): """Inform the potential about a new sample during tuning. This can be used by adaptive potentials to change the @@ -94,6 +94,12 @@ def adapt(self, sample, grad): """ pass + def raise_ok(self): + pass + + def reset(self): + pass + def isquadpotential(value): """Check whether an object might be a QuadPotential object.""" @@ -104,7 +110,7 @@ class QuadPotentialDiagAdapt(QuadPotential): """Adapt a diagonal mass matrix from the sample variances.""" def __init__(self, n, initial_mean, initial_diag=None, initial_weight=0, - adaptation_window=100, dtype=None): + adaptation_window=101, dtype=None): """Set up a diagonal mass matrix.""" if initial_diag is not None and initial_diag.ndim != 1: raise ValueError('Initial diagonal must be one-dimensional.') @@ -162,8 +168,11 @@ def _update_from_weightvar(self, weightvar): np.divide(1, self._stds, out=self._inv_stds) self._var_theano.set_value(self._var) - def adapt(self, sample, grad): + def update(self, sample, grad, tune): """Inform the potential about a new sample during tuning.""" + if not tune: + return + window = self.adaptation_window self._foreground_var.add_sample(sample, weight=1) @@ -176,6 +185,16 @@ def adapt(self, sample, grad): self._n_samples += 1 + def raise_ok(self): + if np.any(self._stds == 0): + raise ValueError('Mass matrix contains zeros on the diagonal. ' + 'Some derivatives might always be zero.') + if np.any(self._stds < 0): + raise ValueError('Mass matrix contains negative values on the ' + 'diagonal.') + if np.any(~np.isfinite(self._stds)): + raise ValueError('Mass matrix contains non-finite values.') + class QuadPotentialDiagAdaptGrad(QuadPotentialDiagAdapt): """Adapt a diagonal mass matrix from the variances of the gradients. @@ -196,15 +215,18 @@ def _update(self, var): np.divide(1, self._stds, out=self._inv_stds) self._var_theano.set_value(self._var) - def adapt(self, sample, grad): + def update(self, sample, grad, tune): """Inform the potential about a new sample during tuning.""" + if not tune: + return + self._grads1[:] += np.abs(grad) self._grads2[:] += np.abs(grad) self._ngrads1 += 1 self._ngrads2 += 1 if self._n_samples <= 150: - super().adapt(sample, grad) + super().update(sample, grad) else: self._update((self._ngrads1 / self._grads1) ** 2) diff --git a/pymc3/step_methods/step_sizes.py b/pymc3/step_methods/step_sizes.py new file mode 100644 index 0000000000..b063216baf --- /dev/null +++ b/pymc3/step_methods/step_sizes.py @@ -0,0 +1,67 @@ +import math + +import numpy as np +from scipy import stats + +from pymc3.backends.report import SamplerWarning, WarningType + + +class DualAverageAdaptation(object): + def __init__(self, initial_step, target, gamma, k, t0): + self._log_step = math.log(initial_step) + self._log_bar = self._log_step + self._target = target + self._hbar = 0. + self._k = k + self._t0 = t0 + self._count = 1 + self._mu = math.log(10 * initial_step) + self._gamma = gamma + self._tuned_stats = [] + + def current(self, tune): + if tune: + return math.exp(self._log_step) + else: + return math.exp(self._log_bar) + + def update(self, accept_stat, tune): + if not tune: + self._tuned_stats.append(accept_stat) + return + + count, k, t0 = self._count, self._k, self._t0 + w = 1. / (count + t0) + self._hbar = ((1 - w) * self._hbar + w * (self._target - accept_stat)) + + self._log_step = self._mu - self._hbar * math.sqrt(count) / self._gamma + mk = count ** -k + self._log_bar = mk * self._log_step + (1 - mk) * self._log_bar + self._count += 1 + + def stats(self): + return { + 'step_size': math.exp(self._log_step), + 'step_size_bar': math.exp(self._log_bar), + } + + def warnings(self): + accept = np.array(self._tuned_stats) + mean_accept = np.mean(accept) + target_accept = self._target + # Try to find a reasonable interval for acceptable acceptance + # probabilities. Finding this was mostry trial and error. + n_bound = min(100, len(accept)) + n_good, n_bad = mean_accept * n_bound, (1 - mean_accept) * n_bound + lower, upper = stats.beta(n_good + 1, n_bad + 1).interval(0.95) + if target_accept < lower or target_accept > upper: + msg = ('The acceptance probability does not match the target. It ' + 'is %s, but should be close to %s. Try to increase the ' + 'number of tuning steps.' + % (mean_accept, target_accept)) + info = {'target': target_accept, 'actual': mean_accept} + warning = SamplerWarning( + WarningType.BAD_ACCEPTANCE, msg, 'warn', None, None, info) + return [warning] + else: + return [] diff --git a/pymc3/tests/test_diagnostics.py b/pymc3/tests/test_diagnostics.py index 31ee57f7d2..382012744d 100644 --- a/pymc3/tests/test_diagnostics.py +++ b/pymc3/tests/test_diagnostics.py @@ -44,7 +44,7 @@ def test_bad(self): def test_right_shape_python_float(self, shape=None, test_shape=None): """Check Gelman-Rubin statistic shape is correct w/ python float""" - n_jobs = 3 + chains = 3 n_samples = 5 with Model(): @@ -55,9 +55,9 @@ def test_right_shape_python_float(self, shape=None, test_shape=None): # start sampling at the MAP start = find_MAP() - step = NUTS(scaling=start) + step = NUTS(scaling=start, step_scale=0.1) ptrace = sample(n_samples, tune=0, step=step, start=start, - njobs=n_jobs, random_seed=42) + chains=chains, random_seed=42) rhat = gelman_rubin(ptrace)['x'] diff --git a/pymc3/tests/test_examples.py b/pymc3/tests/test_examples.py index 19557fb38b..dbcbd3aa96 100644 --- a/pymc3/tests/test_examples.py +++ b/pymc3/tests/test_examples.py @@ -33,7 +33,8 @@ def get_city_data(): class TestARM5_4(SeededTest): def build_model(self): data = pd.read_csv(pm.get_data('wells.dat'), - delimiter=u' ', index_col=u'id', dtype={u'switch': np.int8}) + delimiter=u' ', index_col=u'id', + dtype={u'switch': np.int8}) data.dist /= 100 data.educ /= 4 col = data.columns diff --git a/pymc3/tests/test_sampling.py b/pymc3/tests/test_sampling.py index 5f2584965f..2cb7c15839 100644 --- a/pymc3/tests/test_sampling.py +++ b/pymc3/tests/test_sampling.py @@ -264,18 +264,18 @@ def test_sample_ppc_w(self): with pm.Model() as model_0: mu = pm.Normal('mu', mu=0, sd=1) - y = pm.Normal('y', mu=mu, sd=1, observed=data0) + y = pm.Normal('y', mu=mu, sd=1, observed=data0, shape=500) trace_0 = pm.sample() with pm.Model() as model_1: mu = pm.Normal('mu', mu=0, sd=1, shape=len(data0)) - y = pm.Normal('y', mu=mu, sd=1, observed=data0) + y = pm.Normal('y', mu=mu, sd=1, observed=data0, shape=500) trace_1 = pm.sample() traces = [trace_0, trace_0] models = [model_0, model_0] - ppc = pm.sample_ppc_w(traces, 1000, models) - assert ppc['y'].shape == (1000,) + ppc = pm.sample_ppc_w(traces, 100, models) + assert ppc['y'].shape == (100, 500) traces = [trace_0, trace_1] models = [model_0, model_1] diff --git a/pymc3/tests/test_step.py b/pymc3/tests/test_step.py index 53ee60d4ec..d2e848c815 100644 --- a/pymc3/tests/test_step.py +++ b/pymc3/tests/test_step.py @@ -11,7 +11,6 @@ MultivariateNormalProposal, HamiltonianMC, EllipticalSlice, smc, DEMetropolis) from pymc3.theanof import floatX -from pymc3 import SamplingError from pymc3.distributions import ( Binomial, Normal, Bernoulli, Categorical, Beta, HalfNormal) @@ -62,26 +61,26 @@ class TestStepMethods(object): # yield test doesn't work subclassing object 2.41488213e+00, 4.68298379e-01, 4.86342250e-01, -8.43949966e-01]), HamiltonianMC: np.array([ - -0.74925631, -0.2566773 , -2.12480977, 1.64328926, -1.39315913, - 2.04200003, 0.00706711, 0.34240498, 0.44276674, -0.21368043, - -0.76398723, 1.19280082, -1.43030242, -0.44896107, 0.0547087 , - -1.72170938, -0.20443956, 0.35432546, 1.77695096, -0.31053636, - -0.26729283, 1.26450201, 0.17049917, 0.27953939, -0.24185153, - 0.95617117, -0.45707061, 0.75837366, -1.73391277, 1.63331612, - -0.68426038, 0.20499991, -0.43866983, 0.31080195, 0.47104548, - -0.50331753, 0.7821196 , -1.7544931 , 1.24106497, -1.0152971 , - -0.01949091, -0.33151479, 0.19138253, 0.40349184, 0.31694823, - -0.01508142, -0.31330951, 0.40874228, 0.40874228, 0.58078882, - 0.68378375, 0.84142914, 0.44756075, -0.87297183, 0.59695222, - 1.96161733, -0.37126652, 0.27552912, 0.74547583, -0.16172925, - 0.79969568, -0.20501522, -0.36181518, 0.13114261, -0.8461323 , - -0.07749079, -0.07013026, 0.88022116, -0.5546825 , 0.25232708, - 0.09483573, 0.84910913, 1.33348018, -1.1971401 , 0.49203123, - 0.22365435, 1.3801812 , 0.06885929, 1.07115053, -1.52225141, - 1.50179721, -2.01528399, -1.31610679, -0.32298834, -0.80630885, - -0.6828592 , 0.2897919 , 1.64608125, -0.71793662, -0.5233058 , - 0.53549836, 0.61119221, 0.24235732, -1.3940593 , 0.28380114, - -0.22629978, -0.19318957, 1.12543101, -1.40328285, 0.21054137]), + 0.40608634, 0.40608634, 0.04610354, -0.78588609, 0.03773683, + -0.49373368, 0.21708042, 0.21708042, -0.14413517, -0.68284611, + 0.76299659, 0.24128663, 0.24128663, -0.54835464, -0.84408365, + -0.82762589, -0.67429432, -0.67429432, -0.57900517, -0.97138029, + -0.37809745, -0.37809745, -0.19333181, -0.40329098, 0.54999765, + 1.171515 , 0.90279792, 0.90279792, 1.63830503, -0.90436674, + -0.02516293, -0.02516293, -0.22177082, -0.28061216, -0.10158021, + 0.0807234 , 0.16994063, 0.16994063, 0.4141503 , 0.38505666, + -0.25936504, 2.12074192, 2.24467132, 0.9628703 , -1.37044749, + 0.32983336, -0.55317525, -0.55317525, -0.40295662, -0.40295662, + -0.40295662, 0.49076931, 0.04234407, -1.0002905 , 0.99823615, + 0.99823615, 0.24915904, -0.00965061, 0.48519377, 0.21959942, + -0.93094702, -0.93094702, -0.76812553, -0.73699981, -0.91834134, + -0.91834134, 0.79522886, -0.04267669, -0.04267669, 0.51368761, + 0.51368761, 0.02255577, 0.70823409, 0.70823409, 0.73921198, + 0.30295007, 0.30295007, 0.30295007, -0.1300897 , 0.44310964, + -1.35839961, -1.55398633, -0.57323153, -0.57323153, -1.15435458, + -0.17697793, -0.17697793, 0.2925856 , -0.56119025, -0.15360141, + 0.83715916, -0.02340449, -0.02340449, -0.63074456, -0.82745942, + -0.67626237, 1.13814805, -0.81857813, -0.81857813, 0.26367166]), Metropolis: np.array([ 1.62434536, 1.01258895, 0.4844172, -0.58855142, 1.15626034, 0.39505344, 1.85716138, -0.20297933, -0.20297933, -0.20297933, -0.20297933, -1.08083775, -1.08083775, @@ -425,17 +424,14 @@ def test_parallelized_chains_are_random(self): @pytest.mark.xfail(condition=(theano.config.floatX == "float32"), reason="Fails on float32") class TestNutsCheckTrace(object): - def test_multiple_samplers(self): + def test_multiple_samplers(self, caplog): with Model(): prob = Beta('prob', alpha=5., beta=3.) Binomial('outcome', n=1, p=prob) - # Catching warnings through multiprocessing doesn't work, - # so we have to use single threaded sampling. - with pytest.warns(None) as warns: - sample(3, tune=2, discard_tuned_samples=False, - n_init=None, chains=1) - messages = [warn.message.args[0] for warn in warns] - assert any("contains only 3" in msg for msg in messages) + caplog.clear() + sample(3, tune=2, discard_tuned_samples=False, + n_init=None, chains=1) + messages = [msg.msg for msg in caplog.records] assert all('boolean index did not' not in msg for msg in messages) def test_bad_init(self): @@ -445,24 +441,25 @@ def test_bad_init(self): sample(init=None) error.match('Bad initial') - def test_linalg(self): + def test_linalg(self, caplog): with Model(): a = Normal('a', shape=2) a = tt.switch(a > 0, np.inf, a) b = tt.slinalg.solve(floatX(np.eye(2)), a) Normal('c', mu=b, shape=2) - # Catching warnings through multiprocessing doesn't work, - # so we have to use single threaded sampling. - with pytest.warns(None) as warns: - trace = sample(20, init=None, tune=5, chains=1) - warns = [str(warn.message) for warn in warns] + caplog.clear() + trace = sample(20, init=None, tune=5, chains=2) + warns = [msg.msg for msg in caplog.records] assert np.any(trace['diverging']) - assert any('diverging samples after tuning' in warn - for warn in warns) - # FIXME This test fails sporadically on py27. - # It seems that capturing warnings doesn't work as expected. - # assert any('contains only' in warn for warn in warns) + assert ( + any('Divergences after tuning' in warn + for warn in warns) + or + any('only diverging samples' in warn + for warn in warns)) - with pytest.raises(SamplingError): - sample(20, init=None, nuts_kwargs={'on_error': 'raise'}) + with pytest.raises(ValueError) as error: + trace.report.raise_ok() + error.match('issues during sampling') + assert not trace.report.ok