From 7b5d5703dc976b088075f4391271b071ae60f6ae Mon Sep 17 00:00:00 2001 From: Maxim Kochurov Date: Wed, 12 Apr 2017 22:36:42 +0300 Subject: [PATCH 01/28] add sample_vp, deprecate old ADVI --- pymc3/variational/__init__.py | 2 +- pymc3/variational/advi.py | 3 +++ pymc3/variational/advi_minibatch.py | 3 +++ pymc3/variational/approximations.py | 39 ++++++++++++++++++++++++++++- pymc3/variational/opvi.py | 2 +- 5 files changed, 46 insertions(+), 3 deletions(-) diff --git a/pymc3/variational/__init__.py b/pymc3/variational/__init__.py index 1df84767c1..233f36b6c9 100644 --- a/pymc3/variational/__init__.py +++ b/pymc3/variational/__init__.py @@ -1,4 +1,4 @@ -from .advi import advi, sample_vp +from .advi import advi from .advi_minibatch import advi_minibatch from .updates import ( diff --git a/pymc3/variational/advi.py b/pymc3/variational/advi.py index 759c877183..165431b311 100644 --- a/pymc3/variational/advi.py +++ b/pymc3/variational/advi.py @@ -108,6 +108,9 @@ def advi(vars=None, start=None, model=None, n=5000, accurate_elbo=False, and Blei, D. M. (2016). Automatic Differentiation Variational Inference. arXiv preprint arXiv:1603.00788. """ + import warnings + warnings.warn('Old ADVI interface is deprecated and be removed in future, use pm.ADVI instead', + DeprecationWarning, stacklevel=2) model = pm.modelcontext(model) if start is None: start = model.test_point diff --git a/pymc3/variational/advi_minibatch.py b/pymc3/variational/advi_minibatch.py index fad8cf5561..b97e04a6c5 100644 --- a/pymc3/variational/advi_minibatch.py +++ b/pymc3/variational/advi_minibatch.py @@ -436,6 +436,9 @@ def advi_minibatch(vars=None, start=None, model=None, n=5000, n_mcsamples=1, Weight Uncertainty in Neural Network. In Proceedings of the 32nd International Conference on Machine Learning (ICML-15) (pp. 1613-1622). """ + import warnings + warnings.warn('Old ADVI interface is deprecated and be removed in future, use pm.ADVI instead', + DeprecationWarning, stacklevel=2) if encoder_params is None: encoder_params = [] diff --git a/pymc3/variational/approximations.py b/pymc3/variational/approximations.py index 957ff3fbbb..52ff70e427 100644 --- a/pymc3/variational/approximations.py +++ b/pymc3/variational/approximations.py @@ -12,7 +12,8 @@ __all__ = [ 'MeanField', 'FullRank', - 'Histogram' + 'Histogram', + 'sample_vp' ] @@ -353,3 +354,39 @@ def from_noise(cls, size, jitter=.01, local_rv=None, start=None, model=None): x0 += np.random.normal(0, jitter, x0.shape) hist.histogram.set_value(x0) return hist + + +def sample_vp(approx, draws=100, hide_transformed=False, **kwargs): + """ + Draw samples from variational posterior. + + Parameters + ---------- + approx : Approximation + draws : int + Number of random samples. + hide_transformed : bool + If False, transformed variables are also sampled. Default is True. + + Returns + ------- + trace : pymc3.backends.base.MultiTrace + Samples drawn from variational posterior. + """ + if approx.__class__.__name__ == 'ADVIFit': + import warnings + warnings.warn('Old ADVI interface is deprecated and be removed in future', + DeprecationWarning, stacklevel=2) + _approx = approx + model = kwargs.get('model') + local_rv = kwargs.get('local_RVs') + approx = MeanField(model=model, local_rv=local_rv) + bij = DictToArrayBijection(approx.order, {}) + means = bij.map(_approx.means) + stds = bij.map(_approx.stds) + rhos = np.log(np.exp(stds) - 1) + approx.mean.set_value(means.astype(approx.mean.dtype)) + approx.rho.set_value(rhos.astype(approx.rho.dtype)) + if not isinstance(approx, Approximation): + raise TypeError('Need Approximation instance, got %r' % approx) + return approx.sample_vp(draws=draws, hide_transformed=hide_transformed) diff --git a/pymc3/variational/opvi.py b/pymc3/variational/opvi.py index 71b2e6dda9..24c5b182d1 100644 --- a/pymc3/variational/opvi.py +++ b/pymc3/variational/opvi.py @@ -811,7 +811,7 @@ def sample_vp(self, draws=1, hide_transformed=False): Returns ------- trace : pymc3.backends.base.MultiTrace - Samples drawn from the variational posterior. + Samples drawn from variational posterior. """ if hide_transformed: vars_sampled = [v_ for v_ in self.model.unobserved_RVs From 31e524da47998b1fcbbafb62f1682612f8900935 Mon Sep 17 00:00:00 2001 From: Maxim Kochurov Date: Wed, 12 Apr 2017 22:37:06 +0300 Subject: [PATCH 02/28] found typo --- pymc3/variational/approximations.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pymc3/variational/approximations.py b/pymc3/variational/approximations.py index 52ff70e427..133d4b3c65 100644 --- a/pymc3/variational/approximations.py +++ b/pymc3/variational/approximations.py @@ -56,7 +56,7 @@ def rho(self): @property def cov(self): - return tt.diag(rho2sd(self.rho)) + return tt.diag(rho2sd(self.rho)**2) def create_shared_params(self): return {'mu': theano.shared( From ea1cd8286070f5402ebbb0aa0e3088f142054a43 Mon Sep 17 00:00:00 2001 From: Maxim Kochurov Date: Wed, 12 Apr 2017 22:38:27 +0300 Subject: [PATCH 03/28] fix docs --- pymc3/variational/approximations.py | 4 ---- 1 file changed, 4 deletions(-) diff --git a/pymc3/variational/approximations.py b/pymc3/variational/approximations.py index 133d4b3c65..8fe2bb087b 100644 --- a/pymc3/variational/approximations.py +++ b/pymc3/variational/approximations.py @@ -70,8 +70,6 @@ def create_shared_params(self): def log_q_W_global(self, z): """ log_q_W samples over q for global vars - Gradient wrt mu, rho in density parametrization - is set to zero to lower variance of ELBO """ mu = self.scale_grad(self.mean) rho = self.scale_grad(self.rho) @@ -165,8 +163,6 @@ def create_shared_params(self): def log_q_W_global(self, z): """ log_q_W samples over q for global vars - Gradient wrt mu, rho in density parametrization - is set to zero to lower variance of ELBO """ mu = self.scale_grad(self.mean) L = self.scale_grad(self.L) From f952cdd0f2375803416546dd014c4e5233197c70 Mon Sep 17 00:00:00 2001 From: Maxim Kochurov Date: Wed, 12 Apr 2017 22:56:40 +0300 Subject: [PATCH 04/28] add sample_vp to __init__ --- pymc3/variational/__init__.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/pymc3/variational/__init__.py b/pymc3/variational/__init__.py index 233f36b6c9..09a341a0bf 100644 --- a/pymc3/variational/__init__.py +++ b/pymc3/variational/__init__.py @@ -25,7 +25,8 @@ from .approximations import ( Histogram, FullRank, - MeanField + MeanField, + sample_vp ) from . import approximations From b9d0a03af707037956e5f7d882fc05b6ced9d020 Mon Sep 17 00:00:00 2001 From: Maxim Kochurov Date: Thu, 13 Apr 2017 19:26:52 +0300 Subject: [PATCH 05/28] typo --- pymc3/variational/advi.py | 3 ++- pymc3/variational/approximations.py | 2 +- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/pymc3/variational/advi.py b/pymc3/variational/advi.py index 165431b311..f2627dfa3c 100644 --- a/pymc3/variational/advi.py +++ b/pymc3/variational/advi.py @@ -109,7 +109,8 @@ def advi(vars=None, start=None, model=None, n=5000, accurate_elbo=False, Inference. arXiv preprint arXiv:1603.00788. """ import warnings - warnings.warn('Old ADVI interface is deprecated and be removed in future, use pm.ADVI instead', + warnings.warn('Old ADVI interface is deprecated and will ' + 'be removed in future, use pm.ADVI instead', DeprecationWarning, stacklevel=2) model = pm.modelcontext(model) if start is None: diff --git a/pymc3/variational/approximations.py b/pymc3/variational/approximations.py index 8fe2bb087b..0ed17a5ed8 100644 --- a/pymc3/variational/approximations.py +++ b/pymc3/variational/approximations.py @@ -371,7 +371,7 @@ def sample_vp(approx, draws=100, hide_transformed=False, **kwargs): """ if approx.__class__.__name__ == 'ADVIFit': import warnings - warnings.warn('Old ADVI interface is deprecated and be removed in future', + warnings.warn('Old ADVI interface is deprecated and will be removed in future', DeprecationWarning, stacklevel=2) _approx = approx model = kwargs.get('model') From 35afe54b6e46d99988ccf2d772da414939df2d69 Mon Sep 17 00:00:00 2001 From: Maxim Kochurov Date: Thu, 13 Apr 2017 20:23:30 +0300 Subject: [PATCH 06/28] refactor init nuts, and needed stuff --- pymc3/sampling.py | 21 ++++++++------ pymc3/variational/approximations.py | 33 +++++++++++++--------- pymc3/variational/inference.py | 44 +++++++++++++++++++++++++++++ pymc3/variational/opvi.py | 18 ++---------- 4 files changed, 79 insertions(+), 37 deletions(-) diff --git a/pymc3/sampling.py b/pymc3/sampling.py index fbea31d41e..7b36fbebf7 100644 --- a/pymc3/sampling.py +++ b/pymc3/sampling.py @@ -24,6 +24,7 @@ STEP_METHODS = (NUTS, HamiltonianMC, Metropolis, BinaryMetropolis, BinaryGibbsMetropolis, Slice, CategoricalGibbsMetropolis) + def assign_step_methods(model, step=None, methods=STEP_METHODS, step_kwargs=None): """Assign model variables to appropriate step methods. @@ -566,19 +567,21 @@ def init_nuts(init='ADVI', njobs=1, n_init=500000, model=None, init = init.lower() if init == 'advi': - v_params = pm.variational.advi(n=n_init, random_seed=random_seed, - progressbar=progressbar) - start = pm.variational.sample_vp(v_params, njobs, progressbar=False, - hide_transformed=False, - random_seed=random_seed) + approx = pm.fit( + n=n_init, method='advi', model=model + ) # type: pm.MeanField + start = approx.sample_vp(draws=njobs) + cov = approx.cov.eval() if njobs == 1: start = start[0] - cov = np.power(model.dict_to_array(v_params.stds), 2) elif init == 'advi_map': start = pm.find_MAP() - v_params = pm.variational.advi(n=n_init, start=start, - random_seed=random_seed) - cov = np.power(model.dict_to_array(v_params.stds), 2) + approx = pm.MeanField(model=model, start=start) + pm.fit(n=n_init, method=pm.ADVI.from_mean_field(approx)) + start = approx.sample_vp(draws=n_init) + cov = approx.cov.eval() + if njobs == 1: + start = start[0] elif init == 'map': start = pm.find_MAP() cov = pm.find_hessian(point=start) diff --git a/pymc3/variational/approximations.py b/pymc3/variational/approximations.py index 0ed17a5ed8..80accb97a6 100644 --- a/pymc3/variational/approximations.py +++ b/pymc3/variational/approximations.py @@ -33,6 +33,9 @@ class MeanField(Approximation): model : PyMC3 model for inference + start : Point + initial mean + cost_part_grad_scale : float or scalar tensor Scaling score part of gradient can be useful near optimum for archiving better convergence properties. Common schedule is @@ -58,9 +61,10 @@ def rho(self): def cov(self): return tt.diag(rho2sd(self.rho)**2) - def create_shared_params(self): + def create_shared_params(self, **kwargs): + start = self.gbij(kwargs.get('start', self.model.test_point)) return {'mu': theano.shared( - pm.floatX(self.input.tag.test_value[self.global_slc]), + pm.floatX(start), 'mu'), 'rho': theano.shared( np.zeros((self.global_size,), dtype=theano.config.floatX), @@ -100,6 +104,9 @@ class FullRank(Approximation): model : PyMC3 model for inference + start : Point + initial mean + cost_part_grad_scale : float or scalar tensor Scaling score part of gradient can be useful near optimum for archiving better convergence properties. Common schedule is @@ -147,16 +154,15 @@ def tril_index_matrix(self): tril_index_matrix[np.tril_indices(n)[::-1]] = np.arange(num_tril_entries) return tril_index_matrix - def create_shared_params(self): + def create_shared_params(self, **kwargs): + start = self.gbij(kwargs.get('start', self.model.test_point)) n = self.global_size L_tril = ( np.eye(n) [np.tril_indices(n)] .astype(theano.config.floatX) ) - return {'mu': theano.shared( - self.input.tag.test_value[self.global_slc], - 'mu'), + return {'mu': theano.shared(pm.floatX(start), 'mu'), 'L_tril': theano.shared(L_tril, 'L_tril') } @@ -251,18 +257,14 @@ def check_model(self, model, **kwargs): for var in model.free_RVs])): raise ValueError('trace has not all FreeRV') - def _setup(self, **kwargs): - self._histogram_order = ArrayOrdering(self.global_vars) - self._bij = DictToArrayBijection(self._histogram_order, dict()) - def create_shared_params(self, **kwargs): trace = kwargs.get('trace') if trace is None: - histogram = np.atleast_2d(self._bij.map(self.model.test_point)) + histogram = np.atleast_2d(self.gbij.map(self.model.test_point)) else: histogram = np.empty((len(trace), self.global_size)) for i in range(len(trace)): - histogram[i] = self._bij.map(trace[i]) + histogram[i] = self.gbij.map(trace[i]) return theano.shared(pm.floatX(histogram), 'histogram') def randidx(self, size=None): @@ -320,6 +322,11 @@ def mapping(z): def mean(self): return self.histogram.mean(0) + @property + def cov(self): + x = (self.histogram - self.mean) + return x.T.dot(x) / self.histogram.shape[0] + @classmethod def from_noise(cls, size, jitter=.01, local_rv=None, start=None, model=None): """ @@ -344,7 +351,7 @@ def from_noise(cls, size, jitter=.01, local_rv=None, start=None, model=None): hist = cls(None, local_rv=local_rv, model=model) if start is None: start = hist.model.test_point - start = hist._bij.map(start) + start = hist.gbij.map(start) # Initialize particles x0 = np.tile(start, (size, 1)) x0 += np.random.normal(0, jitter, x0.shape) diff --git a/pymc3/variational/inference.py b/pymc3/variational/inference.py index e0bb005de3..cf2ffb2fe6 100644 --- a/pymc3/variational/inference.py +++ b/pymc3/variational/inference.py @@ -202,6 +202,28 @@ def __init__(self, local_rv=None, model=None, cost_part_grad_scale=1): KL, MeanField, None, local_rv=local_rv, model=model, cost_part_grad_scale=cost_part_grad_scale) + @classmethod + def from_mean_field(cls, mean_field): + """ + Construct ADVI from MeanField approximation + + Parameters + ---------- + mean_field : MeanField + approximation to start with + + Returns + ------- + ADVI + """ + if not isinstance(mean_field, MeanField): + raise TypeError('Expected MeanField, got %r' % mean_field) + inference = object.__new__(cls) + objective = KL(mean_field)(None) + inference.hist = np.asarray(()) + inference.objective = objective + return inference + class FullRankADVI(Inference): """ @@ -241,6 +263,28 @@ def __init__(self, local_rv=None, model=None, cost_part_grad_scale=1, gpu_compat KL, FullRank, None, local_rv=local_rv, model=model, cost_part_grad_scale=cost_part_grad_scale, gpu_compat=gpu_compat) + @classmethod + def from_full_rank(cls, full_rank): + """ + Construct FullRankADVI from FullRank approximation + + Parameters + ---------- + full_rank : FullRank + approximation to start with + + Returns + ------- + FullRankADVI + """ + if not isinstance(full_rank, FullRank): + raise TypeError('Expected MeanField, got %r' % full_rank) + inference = object.__new__(cls) + objective = KL(full_rank)(None) + inference.hist = np.asarray(()) + inference.objective = objective + return inference + @classmethod def from_mean_field(cls, mean_field, gpu_compat=False): """ diff --git a/pymc3/variational/opvi.py b/pymc3/variational/opvi.py index 24c5b182d1..ecf60a5ab5 100644 --- a/pymc3/variational/opvi.py +++ b/pymc3/variational/opvi.py @@ -6,7 +6,7 @@ import pymc3 as pm from .updates import adam from ..distributions.dist_math import rho2sd, log_normal -from ..model import modelcontext, ArrayOrdering +from ..model import modelcontext, ArrayOrdering, DictToArrayBijection from ..theanof import tt_rng, memoize, change_flags, GradScale @@ -487,6 +487,8 @@ def get_transformed(v): self.local_vars = self.get_local_vars(**kwargs) self.global_vars = self.get_global_vars(**kwargs) self.order = ArrayOrdering(self.local_vars + self.global_vars) + self.gbij = DictToArrayBijection(ArrayOrdering(self.global_vars), {}) + self.lbij = DictToArrayBijection(ArrayOrdering(self.local_vars), {}) self.flat_view = model.flatten( vars=self.local_vars + self.global_vars ) @@ -509,20 +511,6 @@ def get_global_vars(self, **kwargs): def get_local_vars(self, **kwargs): return [v for v in self.model.free_RVs if v in self.known] - def __getstate__(self): - state = self.__dict__.copy() - # can be inferred from the rest parts - state.pop('flat_view') - state.pop('order') - return state - - def __setstate__(self, state): - self.__dict__.update(state) - self.order = ArrayOrdering(self.local_vars + self.global_vars) - self.flat_view = self.model.flatten( - vars=self.local_vars + self.global_vars - ) - _view = property(lambda self: self.flat_view.view) input = property(lambda self: self.flat_view.input) From 5c4edd5fe10047ae647310c75b0b1c8389233b58 Mon Sep 17 00:00:00 2001 From: Maxim Kochurov Date: Fri, 14 Apr 2017 01:51:55 +0300 Subject: [PATCH 07/28] refactor sampling --- pymc3/sampling.py | 6 ++-- pymc3/tests/test_variational_inference.py | 4 ++- pymc3/variational/__init__.py | 1 + pymc3/variational/approximations.py | 4 +-- pymc3/variational/callbacks.py | 37 +++++++++++++++++++++++ pymc3/variational/inference.py | 11 ++++--- 6 files changed, 53 insertions(+), 10 deletions(-) create mode 100644 pymc3/variational/callbacks.py diff --git a/pymc3/sampling.py b/pymc3/sampling.py index 7b36fbebf7..aeb426dcfb 100644 --- a/pymc3/sampling.py +++ b/pymc3/sampling.py @@ -568,7 +568,8 @@ def init_nuts(init='ADVI', njobs=1, n_init=500000, model=None, if init == 'advi': approx = pm.fit( - n=n_init, method='advi', model=model + n=n_init, method='advi', model=model, + callbacks=[pm.callbacks.CheckLossConvergence()] ) # type: pm.MeanField start = approx.sample_vp(draws=njobs) cov = approx.cov.eval() @@ -577,7 +578,8 @@ def init_nuts(init='ADVI', njobs=1, n_init=500000, model=None, elif init == 'advi_map': start = pm.find_MAP() approx = pm.MeanField(model=model, start=start) - pm.fit(n=n_init, method=pm.ADVI.from_mean_field(approx)) + pm.fit(n=n_init, method=pm.ADVI.from_mean_field(approx), + callbacks=[pm.callbacks.CheckLossConvergence()]) start = approx.sample_vp(draws=n_init) cov = approx.cov.eval() if njobs == 1: diff --git a/pymc3/tests/test_variational_inference.py b/pymc3/tests/test_variational_inference.py index e7d1a3a996..c2f98f7b50 100644 --- a/pymc3/tests/test_variational_inference.py +++ b/pymc3/tests/test_variational_inference.py @@ -145,7 +145,9 @@ def test_optimizer_with_full_data(self): Normal('x', mu=mu_, sd=sd, observed=data) inf = self.inference() inf.fit(10) - approx = inf.fit(self.NITER, obj_optimizer=self.optimizer) + approx = inf.fit(self.NITER, + obj_optimizer=self.optimizer, + callbacks=[pm.callbacks.CheckLossConvergence()]) trace = approx.sample_vp(10000) np.testing.assert_allclose(np.mean(trace['mu']), mu_post, rtol=0.1) np.testing.assert_allclose(np.std(trace['mu']), np.sqrt(1. / d), rtol=0.4) diff --git a/pymc3/variational/__init__.py b/pymc3/variational/__init__.py index 09a341a0bf..d2a58ec491 100644 --- a/pymc3/variational/__init__.py +++ b/pymc3/variational/__init__.py @@ -35,3 +35,4 @@ from . import opvi from . import updates from . import inference +from . import callbacks diff --git a/pymc3/variational/approximations.py b/pymc3/variational/approximations.py index 80accb97a6..ca241061a7 100644 --- a/pymc3/variational/approximations.py +++ b/pymc3/variational/approximations.py @@ -62,7 +62,7 @@ def cov(self): return tt.diag(rho2sd(self.rho)**2) def create_shared_params(self, **kwargs): - start = self.gbij(kwargs.get('start', self.model.test_point)) + start = self.gbij.map(kwargs.get('start', self.model.test_point)) return {'mu': theano.shared( pm.floatX(start), 'mu'), @@ -155,7 +155,7 @@ def tril_index_matrix(self): return tril_index_matrix def create_shared_params(self, **kwargs): - start = self.gbij(kwargs.get('start', self.model.test_point)) + start = self.gbij.map(kwargs.get('start', self.model.test_point)) n = self.global_size L_tril = ( np.eye(n) diff --git a/pymc3/variational/callbacks.py b/pymc3/variational/callbacks.py new file mode 100644 index 0000000000..f33379aa41 --- /dev/null +++ b/pymc3/variational/callbacks.py @@ -0,0 +1,37 @@ +import scipy.stats as stats + + +class Callback(object): + def __call__(self, approx, loss, i): + raise NotImplementedError + + +class CheckLossConvergence(Callback): + def __init__(self, every=100, window_size=1000, tolerance=1e-3): + """ + + Parameters + ---------- + every : int + how often check convergence + window_size : + last elbos to take + tolerance : float + Error rate under null hypothesis, consider taking small values + """ + self.every = every + self.window_size = window_size + self.critical = tolerance / 2. + + def __call__(self, approx, hist, i): + if hist is None or i < self.window_size or i % self.every: + return + diff = hist[-self.window_size:] - hist[-self.window_size-1:-1] + mean = diff.mean() + # unbiased std of mean + std = diff.std() / (self.window_size - 1) + t = abs(mean / std) + p = stats.t.cdf(t, df=self.window_size) - .5 + # 1 - confidence is lower allowed p + if p < self.critical: + raise StopIteration diff --git a/pymc3/variational/inference.py b/pymc3/variational/inference.py index cf2ffb2fe6..929998cbb3 100644 --- a/pymc3/variational/inference.py +++ b/pymc3/variational/inference.py @@ -88,7 +88,7 @@ def run_profiling(self, n=1000, score=None, **kwargs): progress.close() return step_func.profile - def fit(self, n=10000, score=None, callbacks=None, callback_every=1, + def fit(self, n=10000, score=None, callbacks=None, **kwargs): """ Performs Operator Variational Inference @@ -129,9 +129,8 @@ def fit(self, n=10000, score=None, callbacks=None, callback_every=1, if i % 10 == 0: avg_loss = scores[max(0, i - 1000):i+1].mean() progress.set_description('Average Loss = {:,.5g}'.format(avg_loss)) - if i % callback_every == 0: - for callback in callbacks: - callback(self.approx, scores[:i+1], i) + for callback in callbacks: + callback(self.approx, scores[:i+1], i) except (KeyboardInterrupt, StopIteration): # pragma: no cover # do not print log on the same line progress.close() @@ -156,7 +155,9 @@ def fit(self, n=10000, score=None, callbacks=None, callback_every=1, try: for _ in progress: step_func() - except KeyboardInterrupt: + for callback in callbacks: + callback(self.approx, None, i) + except (KeyboardInterrupt, StopIteration): pass finally: progress.close() From 5e12767c086c062b60b8a9ed8cb654e843d4d9f7 Mon Sep 17 00:00:00 2001 From: Maxim Kochurov Date: Fri, 14 Apr 2017 02:11:30 +0300 Subject: [PATCH 08/28] typo --- pymc3/variational/callbacks.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/pymc3/variational/callbacks.py b/pymc3/variational/callbacks.py index f33379aa41..16683b959a 100644 --- a/pymc3/variational/callbacks.py +++ b/pymc3/variational/callbacks.py @@ -1,4 +1,5 @@ import scipy.stats as stats +import numpy as np class Callback(object): @@ -26,10 +27,11 @@ def __init__(self, every=100, window_size=1000, tolerance=1e-3): def __call__(self, approx, hist, i): if hist is None or i < self.window_size or i % self.every: return - diff = hist[-self.window_size:] - hist[-self.window_size-1:-1] + diff = ((hist[-self.window_size:] - hist[-self.window_size-1:-1]) + / hist[-self.window_size-1:-1]) mean = diff.mean() # unbiased std of mean - std = diff.std() / (self.window_size - 1) + std = diff.std() / (self.window_size - 1)**.5 t = abs(mean / std) p = stats.t.cdf(t, df=self.window_size) - .5 # 1 - confidence is lower allowed p From 2b4442d3d1bbeb07e6ae7ed39c3b4b1280391925 Mon Sep 17 00:00:00 2001 From: Maxim Kochurov Date: Fri, 14 Apr 2017 10:52:22 +0300 Subject: [PATCH 09/28] unused import --- pymc3/variational/approximations.py | 2 +- pymc3/variational/callbacks.py | 1 - 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/pymc3/variational/approximations.py b/pymc3/variational/approximations.py index ca241061a7..f632e5cbc9 100644 --- a/pymc3/variational/approximations.py +++ b/pymc3/variational/approximations.py @@ -3,7 +3,7 @@ from theano import tensor as tt import pymc3 as pm -from pymc3 import ArrayOrdering, DictToArrayBijection +from pymc3 import DictToArrayBijection from pymc3.distributions.dist_math import rho2sd, log_normal, log_normal_mv from pymc3.variational.opvi import Approximation from pymc3.theanof import tt_rng, memoize diff --git a/pymc3/variational/callbacks.py b/pymc3/variational/callbacks.py index 16683b959a..fb5c61c402 100644 --- a/pymc3/variational/callbacks.py +++ b/pymc3/variational/callbacks.py @@ -1,5 +1,4 @@ import scipy.stats as stats -import numpy as np class Callback(object): From ac715bf8b0c36abbc357e4d5e0a439aa37ab1545 Mon Sep 17 00:00:00 2001 From: Maxim Kochurov Date: Fri, 14 Apr 2017 22:13:29 +0300 Subject: [PATCH 10/28] rename sample_approx --- pymc3/sampling.py | 4 ++-- pymc3/tests/test_variational_inference.py | 22 +++++++++++----------- pymc3/variational/__init__.py | 4 ++-- pymc3/variational/advi.py | 8 ++++++-- pymc3/variational/approximations.py | 18 ++---------------- pymc3/variational/opvi.py | 2 +- 6 files changed, 24 insertions(+), 34 deletions(-) diff --git a/pymc3/sampling.py b/pymc3/sampling.py index aeb426dcfb..a0f3530d9b 100644 --- a/pymc3/sampling.py +++ b/pymc3/sampling.py @@ -571,7 +571,7 @@ def init_nuts(init='ADVI', njobs=1, n_init=500000, model=None, n=n_init, method='advi', model=model, callbacks=[pm.callbacks.CheckLossConvergence()] ) # type: pm.MeanField - start = approx.sample_vp(draws=njobs) + start = approx.sample(draws=njobs) cov = approx.cov.eval() if njobs == 1: start = start[0] @@ -580,7 +580,7 @@ def init_nuts(init='ADVI', njobs=1, n_init=500000, model=None, approx = pm.MeanField(model=model, start=start) pm.fit(n=n_init, method=pm.ADVI.from_mean_field(approx), callbacks=[pm.callbacks.CheckLossConvergence()]) - start = approx.sample_vp(draws=n_init) + start = approx.sample(draws=n_init) cov = approx.cov.eval() if njobs == 1: start = start[0] diff --git a/pymc3/tests/test_variational_inference.py b/pymc3/tests/test_variational_inference.py index c2f98f7b50..b513437505 100644 --- a/pymc3/tests/test_variational_inference.py +++ b/pymc3/tests/test_variational_inference.py @@ -59,7 +59,7 @@ def _test_aevb(self): with model: inference = self.inference(local_rv={x: (mu, rho)}) approx = inference.fit(3, obj_n_mc=2, obj_optimizer=self.optimizer) - approx.sample_vp(10) + approx.sample(10) approx.apply_replacements( y, more_replacements={x: np.asarray([1, 1], dtype=x.dtype)} @@ -105,17 +105,17 @@ def test_vars_view_dynamic_size_numpy(self): x_sampled = app.view(app.random_fn(), 'x') assert x_sampled.shape == () + model['x'].dshape - def test_sample_vp(self): + def test_sample(self): n_samples = 100 xs = np.random.binomial(n=1, p=0.2, size=n_samples) with pm.Model(): p = pm.Beta('p', alpha=1, beta=1) pm.Binomial('xs', n=1, p=p, observed=xs) app = self.inference().approx - trace = app.sample_vp(draws=1, hide_transformed=True) + trace = app.sample(draws=1, hide_transformed=True) assert trace.varnames == ['p'] assert len(trace) == 1 - trace = app.sample_vp(draws=10, hide_transformed=False) + trace = app.sample(draws=10, hide_transformed=False) assert sorted(trace.varnames) == ['p', 'p_logodds_'] assert len(trace) == 10 @@ -148,7 +148,7 @@ def test_optimizer_with_full_data(self): approx = inf.fit(self.NITER, obj_optimizer=self.optimizer, callbacks=[pm.callbacks.CheckLossConvergence()]) - trace = approx.sample_vp(10000) + trace = approx.sample(10000) np.testing.assert_allclose(np.mean(trace['mu']), mu_post, rtol=0.1) np.testing.assert_allclose(np.std(trace['mu']), np.sqrt(1. / d), rtol=0.4) @@ -175,7 +175,7 @@ def create_minibatch(data): Normal('x', mu=mu_, sd=sd, observed=minibatches, total_size=n) inf = self.inference() approx = inf.fit(self.NITER * 3, obj_optimizer=self.optimizer) - trace = approx.sample_vp(10000) + trace = approx.sample(10000) np.testing.assert_allclose(np.mean(trace['mu']), mu_post, rtol=0.1) np.testing.assert_allclose(np.std(trace['mu']), np.sqrt(1. / d), rtol=0.4) @@ -206,7 +206,7 @@ def cb(*_): Normal('x', mu=mu_, sd=sd, observed=data_t, total_size=n) inf = self.inference() approx = inf.fit(self.NITER * 3, callbacks=[cb], obj_n_mc=10, obj_optimizer=self.optimizer) - trace = approx.sample_vp(10000) + trace = approx.sample(10000) np.testing.assert_allclose(np.mean(trace['mu']), mu_post, rtol=0.4) np.testing.assert_allclose(np.std(trace['mu']), np.sqrt(1. / d), rtol=0.4) @@ -281,9 +281,9 @@ def test_sampling(self): with models.multidimensional_model()[1]: full_rank = FullRankADVI() approx = full_rank.fit(20) - trace0 = approx.sample_vp(10000) + trace0 = approx.sample(10000) histogram = Histogram(trace0) - trace1 = histogram.sample_vp(100000) + trace1 = histogram.sample(100000) np.testing.assert_allclose(trace0['x'].mean(0), trace1['x'].mean(0), atol=0.01) np.testing.assert_allclose(trace0['x'].var(0), trace1['x'].var(0), atol=0.01) @@ -295,9 +295,9 @@ def test_aevb_histogram(self): with model: inference = ADVI(local_rv={x: (mu, rho)}) approx = inference.approx - trace0 = approx.sample_vp(10000) + trace0 = approx.sample(10000) histogram = Histogram(trace0, local_rv={x: (mu, rho)}) - trace1 = histogram.sample_vp(10000) + trace1 = histogram.sample(10000) histogram.random(no_rand=True) histogram.random_fn(no_rand=True) np.testing.assert_allclose(trace0['y'].mean(0), trace1['y'].mean(0), atol=0.02) diff --git a/pymc3/variational/__init__.py b/pymc3/variational/__init__.py index d2a58ec491..a61d63c8db 100644 --- a/pymc3/variational/__init__.py +++ b/pymc3/variational/__init__.py @@ -1,4 +1,4 @@ -from .advi import advi +from .advi import advi, sample_vp from .advi_minibatch import advi_minibatch from .updates import ( @@ -26,7 +26,7 @@ Histogram, FullRank, MeanField, - sample_vp + sample_approx ) from . import approximations diff --git a/pymc3/variational/advi.py b/pymc3/variational/advi.py index f2627dfa3c..fffdc1814c 100644 --- a/pymc3/variational/advi.py +++ b/pymc3/variational/advi.py @@ -109,8 +109,8 @@ def advi(vars=None, start=None, model=None, n=5000, accurate_elbo=False, Inference. arXiv preprint arXiv:1603.00788. """ import warnings - warnings.warn('Old ADVI interface is deprecated and will ' - 'be removed in future, use pm.ADVI instead', + warnings.warn('Old ADVI interface and sample_vp is deprecated and will ' + 'be removed in future, use pm.fit and pm.sample_approx instead', DeprecationWarning, stacklevel=2) model = pm.modelcontext(model) if start is None: @@ -361,6 +361,10 @@ def sample_vp( trace : pymc3.backends.base.MultiTrace Samples drawn from the variational posterior. """ + import warnings + warnings.warn('Old ADVI interface and sample_vp is deprecated and will ' + 'be removed in future, use pm.fit and pm.sample_approx instead', + DeprecationWarning, stacklevel=2) model = pm.modelcontext(model) if isinstance(vparams, ADVIFit): diff --git a/pymc3/variational/approximations.py b/pymc3/variational/approximations.py index f632e5cbc9..22129e643d 100644 --- a/pymc3/variational/approximations.py +++ b/pymc3/variational/approximations.py @@ -359,7 +359,7 @@ def from_noise(cls, size, jitter=.01, local_rv=None, start=None, model=None): return hist -def sample_vp(approx, draws=100, hide_transformed=False, **kwargs): +def sample_approx(approx, draws=100, hide_transformed=False): """ Draw samples from variational posterior. @@ -376,20 +376,6 @@ def sample_vp(approx, draws=100, hide_transformed=False, **kwargs): trace : pymc3.backends.base.MultiTrace Samples drawn from variational posterior. """ - if approx.__class__.__name__ == 'ADVIFit': - import warnings - warnings.warn('Old ADVI interface is deprecated and will be removed in future', - DeprecationWarning, stacklevel=2) - _approx = approx - model = kwargs.get('model') - local_rv = kwargs.get('local_RVs') - approx = MeanField(model=model, local_rv=local_rv) - bij = DictToArrayBijection(approx.order, {}) - means = bij.map(_approx.means) - stds = bij.map(_approx.stds) - rhos = np.log(np.exp(stds) - 1) - approx.mean.set_value(means.astype(approx.mean.dtype)) - approx.rho.set_value(rhos.astype(approx.rho.dtype)) if not isinstance(approx, Approximation): raise TypeError('Need Approximation instance, got %r' % approx) - return approx.sample_vp(draws=draws, hide_transformed=hide_transformed) + return approx.sample(draws=draws, hide_transformed=hide_transformed) diff --git a/pymc3/variational/opvi.py b/pymc3/variational/opvi.py index ecf60a5ab5..2fe70357d2 100644 --- a/pymc3/variational/opvi.py +++ b/pymc3/variational/opvi.py @@ -785,7 +785,7 @@ def inner(size=None, no_rand=False): return inner - def sample_vp(self, draws=1, hide_transformed=False): + def sample(self, draws=1, hide_transformed=False): """ Draw samples from variational posterior. From 67fe0719d3edee676d355b76314af2a1b64a07f3 Mon Sep 17 00:00:00 2001 From: Maxim Kochurov Date: Fri, 14 Apr 2017 22:23:19 +0300 Subject: [PATCH 11/28] unused import --- pymc3/variational/approximations.py | 1 - 1 file changed, 1 deletion(-) diff --git a/pymc3/variational/approximations.py b/pymc3/variational/approximations.py index 22129e643d..eddc15b44f 100644 --- a/pymc3/variational/approximations.py +++ b/pymc3/variational/approximations.py @@ -3,7 +3,6 @@ from theano import tensor as tt import pymc3 as pm -from pymc3 import DictToArrayBijection from pymc3.distributions.dist_math import rho2sd, log_normal, log_normal_mv from pymc3.variational.opvi import Approximation from pymc3.theanof import tt_rng, memoize From e9ce5998c95275572325d792f0ff4f543b7900cc Mon Sep 17 00:00:00 2001 From: Maxim Kochurov Date: Sat, 15 Apr 2017 00:12:28 +0300 Subject: [PATCH 12/28] allow seed kwarg --- pymc3/theanof.py | 15 ++++++++++++--- pymc3/variational/approximations.py | 4 ++-- pymc3/variational/opvi.py | 23 ++++++++++++++++++++--- 3 files changed, 34 insertions(+), 8 deletions(-) diff --git a/pymc3/theanof.py b/pymc3/theanof.py index 3d4c9fdd08..10c5efcce6 100644 --- a/pymc3/theanof.py +++ b/pymc3/theanof.py @@ -372,9 +372,15 @@ def launch_rng(rng): launch_rng(_tt_rng) -def tt_rng(): +def tt_rng(seed=None): """ - Get the package-level random number generator. + Get the package-level random number generator or new with specified seed. + + Parameters + ---------- + seed : int + If not None + returns *new* theano random generator without replacing package global one Returns ------- @@ -382,7 +388,10 @@ def tt_rng(): `theano.sandbox.rng_mrg.MRG_RandomStreams` instance passed to the most recent call of `set_tt_rng` """ - return _tt_rng + if seed is None: + return _tt_rng + else: + return MRG_RandomStreams(seed) def set_tt_rng(new_rng): diff --git a/pymc3/variational/approximations.py b/pymc3/variational/approximations.py index eddc15b44f..853e68c3d0 100644 --- a/pymc3/variational/approximations.py +++ b/pymc3/variational/approximations.py @@ -5,7 +5,7 @@ import pymc3 as pm from pymc3.distributions.dist_math import rho2sd, log_normal, log_normal_mv from pymc3.variational.opvi import Approximation -from pymc3.theanof import tt_rng, memoize +from pymc3.theanof import memoize __all__ = [ @@ -278,7 +278,7 @@ def randidx(self, size=None): pass else: size = tuple(np.atleast_1d(size)) - return (tt_rng() + return (self._rng .uniform(size=size, low=0.0, high=self.histogram.shape[0] - 1e-16) .astype('int64')) diff --git a/pymc3/variational/opvi.py b/pymc3/variational/opvi.py index 2fe70357d2..28ff46b3a9 100644 --- a/pymc3/variational/opvi.py +++ b/pymc3/variational/opvi.py @@ -431,6 +431,10 @@ class Approximation(object): See (Sticking the Landing; Geoffrey Roeder, Yuhuai Wu, David Duvenaud, 2016) for details + seed : None or int + leave None to use package global RandomStream or other + valid value to create instance specific one + Subclassing ----------- Defining an approximation needs @@ -470,8 +474,10 @@ class Approximation(object): initial_dist_name = 'normal' initial_dist_map = 0. - def __init__(self, local_rv=None, model=None, cost_part_grad_scale=1, **kwargs): + def __init__(self, local_rv=None, model=None, cost_part_grad_scale=1, seed=None, **kwargs): model = modelcontext(model) + self._seed = seed + self._rng = tt_rng(seed) self.model = model self.check_model(model, **kwargs) if local_rv is None: @@ -496,6 +502,17 @@ def get_transformed(v): self._setup(**kwargs) self.shared_params = self.create_shared_params(**kwargs) + def seed(self, seed=None): + """ + Reinitialize RandomStream used by this approximation + + Parameters + ---------- + seed : int + """ + self._seed = seed + self._rng.seed(seed) + @property def normalizing_constant(self): t = self.to_flat_input(tt.max([v.scaling for v in self.model.basic_RVs])) @@ -678,7 +695,7 @@ def initial(self, size, no_rand=False, l=None): shape = tt.stack(*shape) if theano_condition_is_here: no_rand = tt.as_tensor(no_rand) - sample = getattr(tt_rng(), self.initial_dist_name)(shape) + sample = getattr(self._rng, self.initial_dist_name)(shape) space = tt.switch( no_rand, tt.ones_like(sample) * self.initial_dist_map, @@ -688,7 +705,7 @@ def initial(self, size, no_rand=False, l=None): if no_rand: return tt.ones(shape) * self.initial_dist_map else: - return getattr(tt_rng(), self.initial_dist_name)(shape) + return getattr(self._rng, self.initial_dist_name)(shape) return space def random_local(self, size=None, no_rand=False): From 21c16153ecd473a027df2af1e9a4fd3c71810e1a Mon Sep 17 00:00:00 2001 From: Maxim Kochurov Date: Sat, 15 Apr 2017 01:40:10 +0300 Subject: [PATCH 13/28] add docks, additional callbacks --- pymc3/sampling.py | 10 +++++-- pymc3/tests/test_variational_inference.py | 11 ++++++-- pymc3/variational/approximations.py | 25 +++++++++++++---- pymc3/variational/callbacks.py | 32 +++++++++++++++++++-- pymc3/variational/inference.py | 34 ++++++++++++++++------- 5 files changed, 88 insertions(+), 24 deletions(-) diff --git a/pymc3/sampling.py b/pymc3/sampling.py index a0f3530d9b..785596ec7d 100644 --- a/pymc3/sampling.py +++ b/pymc3/sampling.py @@ -568,6 +568,7 @@ def init_nuts(init='ADVI', njobs=1, n_init=500000, model=None, if init == 'advi': approx = pm.fit( + seed=random_seed, n=n_init, method='advi', model=model, callbacks=[pm.callbacks.CheckLossConvergence()] ) # type: pm.MeanField @@ -578,9 +579,12 @@ def init_nuts(init='ADVI', njobs=1, n_init=500000, model=None, elif init == 'advi_map': start = pm.find_MAP() approx = pm.MeanField(model=model, start=start) - pm.fit(n=n_init, method=pm.ADVI.from_mean_field(approx), - callbacks=[pm.callbacks.CheckLossConvergence()]) - start = approx.sample(draws=n_init) + pm.fit( + seed=random_seed, + n=n_init, method=pm.ADVI.from_mean_field(approx), + callbacks=[pm.callbacks.CheckLossConvergence()] + ) + start = approx.sample(draws=njobs) cov = approx.cov.eval() if njobs == 1: start = start[0] diff --git a/pymc3/tests/test_variational_inference.py b/pymc3/tests/test_variational_inference.py index b513437505..6d0434dae4 100644 --- a/pymc3/tests/test_variational_inference.py +++ b/pymc3/tests/test_variational_inference.py @@ -147,7 +147,8 @@ def test_optimizer_with_full_data(self): inf.fit(10) approx = inf.fit(self.NITER, obj_optimizer=self.optimizer, - callbacks=[pm.callbacks.CheckLossConvergence()]) + callbacks= + [pm.callbacks.CheckLossConvergence1()]) trace = approx.sample(10000) np.testing.assert_allclose(np.mean(trace['mu']), mu_post, rtol=0.1) np.testing.assert_allclose(np.std(trace['mu']), np.sqrt(1. / d), rtol=0.4) @@ -174,7 +175,9 @@ def create_minibatch(data): mu_ = Normal('mu', mu=mu0, sd=sd0, testval=0) Normal('x', mu=mu_, sd=sd, observed=minibatches, total_size=n) inf = self.inference() - approx = inf.fit(self.NITER * 3, obj_optimizer=self.optimizer) + approx = inf.fit(self.NITER * 3, obj_optimizer=self.optimizer, + callbacks= + [pm.callbacks.CheckLossConvergence1()]) trace = approx.sample(10000) np.testing.assert_allclose(np.mean(trace['mu']), mu_post, rtol=0.1) np.testing.assert_allclose(np.std(trace['mu']), np.sqrt(1. / d), rtol=0.4) @@ -205,7 +208,9 @@ def cb(*_): mu_ = Normal('mu', mu=mu0, sd=sd0, testval=0) Normal('x', mu=mu_, sd=sd, observed=data_t, total_size=n) inf = self.inference() - approx = inf.fit(self.NITER * 3, callbacks=[cb], obj_n_mc=10, obj_optimizer=self.optimizer) + approx = inf.fit(self.NITER * 3, callbacks= + [cb, pm.callbacks.CheckLossConvergence1()], + obj_n_mc=10, obj_optimizer=self.optimizer) trace = approx.sample(10000) np.testing.assert_allclose(np.mean(trace['mu']), mu_post, rtol=0.4) np.testing.assert_allclose(np.std(trace['mu']), np.sqrt(1. / d), rtol=0.4) diff --git a/pymc3/variational/approximations.py b/pymc3/variational/approximations.py index 853e68c3d0..35df6a88d3 100644 --- a/pymc3/variational/approximations.py +++ b/pymc3/variational/approximations.py @@ -42,6 +42,10 @@ class MeanField(Approximation): See (Sticking the Landing; Geoffrey Roeder, Yuhuai Wu, David Duvenaud, 2016) for details + seed : None or int + leave None to use package global RandomStream or other + valid value to create instance specific one + References ---------- Geoffrey Roeder, Yuhuai Wu, David Duvenaud, 2016 @@ -113,16 +117,21 @@ class FullRank(Approximation): See (Sticking the Landing; Geoffrey Roeder, Yuhuai Wu, David Duvenaud, 2016) for details + seed : None or int + leave None to use package global RandomStream or other + valid value to create instance specific one + References ---------- Geoffrey Roeder, Yuhuai Wu, David Duvenaud, 2016 Sticking the Landing: A Simple Reduced-Variance Gradient for ADVI approximateinference.org/accepted/RoederEtAl2016.pdf """ - def __init__(self, local_rv=None, model=None, cost_part_grad_scale=1, gpu_compat=False): + def __init__(self, local_rv=None, model=None, cost_part_grad_scale=1, gpu_compat=False, seed=None): super(FullRank, self).__init__( local_rv=local_rv, model=model, - cost_part_grad_scale=cost_part_grad_scale + cost_part_grad_scale=cost_part_grad_scale, + seed=seed ) self.gpu_compat = gpu_compat @@ -239,6 +248,10 @@ class Histogram(Approximation): model : PyMC3 model + seed : None or int + leave None to use package global RandomStream or other + valid value to create instance specific one + Usage ----- >>> with model: @@ -246,8 +259,8 @@ class Histogram(Approximation): ... trace = sample(1000, step=step) ... histogram = Histogram(trace[100:]) """ - def __init__(self, trace, local_rv=None, model=None): - super(Histogram, self).__init__(local_rv=local_rv, model=model, trace=trace) + def __init__(self, trace, local_rv=None, model=None, seed=None): + super(Histogram, self).__init__(local_rv=local_rv, model=model, trace=trace, seed=seed) def check_model(self, model, **kwargs): trace = kwargs.get('trace') @@ -327,7 +340,7 @@ def cov(self): return x.T.dot(x) / self.histogram.shape[0] @classmethod - def from_noise(cls, size, jitter=.01, local_rv=None, start=None, model=None): + def from_noise(cls, size, jitter=.01, local_rv=None, start=None, model=None, seed=None): """ Initialize Histogram with random noise @@ -347,7 +360,7 @@ def from_noise(cls, size, jitter=.01, local_rv=None, start=None, model=None): ------- Histogram """ - hist = cls(None, local_rv=local_rv, model=model) + hist = cls(None, local_rv=local_rv, model=model, seed=seed) if start is None: start = hist.model.test_point start = hist.gbij.map(start) diff --git a/pymc3/variational/callbacks.py b/pymc3/variational/callbacks.py index fb5c61c402..967249d73d 100644 --- a/pymc3/variational/callbacks.py +++ b/pymc3/variational/callbacks.py @@ -1,4 +1,11 @@ import scipy.stats as stats +import numpy as np + +__all__ = [ + 'Callback', + 'CheckLossConvergence1', + 'CheckLossConvergence2' +] class Callback(object): @@ -6,8 +13,8 @@ def __call__(self, approx, loss, i): raise NotImplementedError -class CheckLossConvergence(Callback): - def __init__(self, every=100, window_size=1000, tolerance=1e-3): +class CheckLossConvergence1(Callback): + def __init__(self, every=100, window_size=2000, tolerance=1e-3): """ Parameters @@ -36,3 +43,24 @@ def __call__(self, approx, hist, i): # 1 - confidence is lower allowed p if p < self.critical: raise StopIteration + + +class CheckLossConvergence2(Callback): + def __init__(self, every=100, tolerance=1e-2, steps=None): + self.steps = steps + self.every = every + self.tolerance = tolerance + + def __call__(self, approx, hist, i): + if hist is None or i < self.every or i % self.every: + return + if self.steps is None: + window = int(max(0.1 * hist.size // self.every, 2.0)) + else: + window = int(max(0.1 * self.steps // self.every, 2.0)) + losses = hist[::self.every][-window:] + diff = np.abs((losses[1:]-losses[:-1])/losses[:-1]) + mean = np.mean(diff) + med = np.median(diff) + if mean < self.tolerance or med < self.tolerance: + raise StopIteration diff --git a/pymc3/variational/inference.py b/pymc3/variational/inference.py index 929998cbb3..85073a1de5 100644 --- a/pymc3/variational/inference.py +++ b/pymc3/variational/inference.py @@ -184,6 +184,9 @@ class ADVI(Inference): 1 at the start and 0 in the end. So slow decay will be ok. See (Sticking the Landing; Geoffrey Roeder, Yuhuai Wu, David Duvenaud, 2016) for details + seed : None or int + leave None to use package global RandomStream or other + valid value to create instance specific one References ---------- @@ -198,10 +201,10 @@ class ADVI(Inference): - Kingma, D. P., & Welling, M. (2014). Auto-Encoding Variational Bayes. stat, 1050, 1. """ - def __init__(self, local_rv=None, model=None, cost_part_grad_scale=1): + def __init__(self, local_rv=None, model=None, cost_part_grad_scale=1, seed=None): super(ADVI, self).__init__( KL, MeanField, None, - local_rv=local_rv, model=model, cost_part_grad_scale=cost_part_grad_scale) + local_rv=local_rv, model=model, cost_part_grad_scale=cost_part_grad_scale, seed=seed) @classmethod def from_mean_field(cls, mean_field): @@ -246,6 +249,10 @@ class FullRankADVI(Inference): See (Sticking the Landing; Geoffrey Roeder, Yuhuai Wu, David Duvenaud, 2016) for details + seed : None or int + leave None to use package global RandomStream or other + valid value to create instance specific one + References ---------- - Kucukelbir, A., Tran, D., Ranganath, R., Gelman, A., @@ -259,10 +266,11 @@ class FullRankADVI(Inference): - Kingma, D. P., & Welling, M. (2014). Auto-Encoding Variational Bayes. stat, 1050, 1. """ - def __init__(self, local_rv=None, model=None, cost_part_grad_scale=1, gpu_compat=False): + def __init__(self, local_rv=None, model=None, cost_part_grad_scale=1, gpu_compat=False, seed=None): super(FullRankADVI, self).__init__( KL, FullRank, None, - local_rv=local_rv, model=model, cost_part_grad_scale=cost_part_grad_scale, gpu_compat=gpu_compat) + local_rv=local_rv, model=model, cost_part_grad_scale=cost_part_grad_scale, + gpu_compat=gpu_compat, seed=seed) @classmethod def from_full_rank(cls, full_rank): @@ -366,6 +374,9 @@ class SVGD(Inference): initial point for inference histogram : Histogram initialize SVGD with given Histogram instead of default initial particles + seed : None or int + leave None to use package global RandomStream or other + valid value to create instance specific one References ---------- @@ -374,17 +385,17 @@ class SVGD(Inference): arXiv:1608.04471 """ def __init__(self, n_particles=100, jitter=.01, model=None, kernel=test_functions.rbf, - start=None, histogram=None, local_rv=None): + start=None, histogram=None, seed=None, local_rv=None): if histogram is None: histogram = Histogram.from_noise( - n_particles, jitter=jitter, start=start, model=model, local_rv=local_rv) + n_particles, jitter=jitter, start=start, model=model, local_rv=local_rv, seed=seed) super(SVGD, self).__init__( KSD, histogram, kernel, - model=model) + model=model, seed=seed) -def fit(n=10000, local_rv=None, method='advi', model=None, **kwargs): +def fit(n=10000, local_rv=None, method='advi', model=None, seed=None, **kwargs): """ Handy shortcut for using inference methods in functional way @@ -402,6 +413,9 @@ def fit(n=10000, local_rv=None, method='advi', model=None, **kwargs): kwargs : kwargs for Inference.fit frac : float if method is 'advi->fullrank_advi' represents advi fraction when training + seed : None or int + leave None to use package global RandomStream or other + valid value to create instance specific one Returns ------- @@ -420,7 +434,7 @@ def fit(n=10000, local_rv=None, method='advi', model=None, **kwargs): raise ValueError('frac should be in (0, 1)') n1 = int(n * frac) n2 = n-n1 - inference = ADVI(local_rv=local_rv, model=model) + inference = ADVI(local_rv=local_rv, model=model, seed=seed) logger.info('fitting advi ...') inference.fit(n1, **kwargs) inference = FullRankADVI.from_advi(inference) @@ -430,7 +444,7 @@ def fit(n=10000, local_rv=None, method='advi', model=None, **kwargs): elif isinstance(method, str): try: inference = _select[method.lower()]( - local_rv=local_rv, model=model + local_rv=local_rv, model=model, seed=seed ) except KeyError: raise KeyError('method should be one of %s ' From d493caa1278c158b78aa02c8f23d4f56c311f975 Mon Sep 17 00:00:00 2001 From: Maxim Kochurov Date: Sat, 15 Apr 2017 02:16:54 +0300 Subject: [PATCH 14/28] change callback --- pymc3/tests/test_variational_inference.py | 6 +- pymc3/variational/callbacks.py | 67 ++++++----------------- 2 files changed, 21 insertions(+), 52 deletions(-) diff --git a/pymc3/tests/test_variational_inference.py b/pymc3/tests/test_variational_inference.py index 6d0434dae4..9e3e4cacb6 100644 --- a/pymc3/tests/test_variational_inference.py +++ b/pymc3/tests/test_variational_inference.py @@ -148,7 +148,7 @@ def test_optimizer_with_full_data(self): approx = inf.fit(self.NITER, obj_optimizer=self.optimizer, callbacks= - [pm.callbacks.CheckLossConvergence1()]) + [pm.callbacks.CheckParametersConvergence()]) trace = approx.sample(10000) np.testing.assert_allclose(np.mean(trace['mu']), mu_post, rtol=0.1) np.testing.assert_allclose(np.std(trace['mu']), np.sqrt(1. / d), rtol=0.4) @@ -177,7 +177,7 @@ def create_minibatch(data): inf = self.inference() approx = inf.fit(self.NITER * 3, obj_optimizer=self.optimizer, callbacks= - [pm.callbacks.CheckLossConvergence1()]) + [pm.callbacks.CheckParametersConvergence()]) trace = approx.sample(10000) np.testing.assert_allclose(np.mean(trace['mu']), mu_post, rtol=0.1) np.testing.assert_allclose(np.std(trace['mu']), np.sqrt(1. / d), rtol=0.4) @@ -209,7 +209,7 @@ def cb(*_): Normal('x', mu=mu_, sd=sd, observed=data_t, total_size=n) inf = self.inference() approx = inf.fit(self.NITER * 3, callbacks= - [cb, pm.callbacks.CheckLossConvergence1()], + [cb, pm.callbacks.CheckParametersConvergence()], obj_n_mc=10, obj_optimizer=self.optimizer) trace = approx.sample(10000) np.testing.assert_allclose(np.mean(trace['mu']), mu_post, rtol=0.4) diff --git a/pymc3/variational/callbacks.py b/pymc3/variational/callbacks.py index 967249d73d..96e4d21fde 100644 --- a/pymc3/variational/callbacks.py +++ b/pymc3/variational/callbacks.py @@ -1,10 +1,8 @@ -import scipy.stats as stats import numpy as np __all__ = [ 'Callback', - 'CheckLossConvergence1', - 'CheckLossConvergence2' + 'CheckParametersConvergence' ] @@ -13,54 +11,25 @@ def __call__(self, approx, loss, i): raise NotImplementedError -class CheckLossConvergence1(Callback): - def __init__(self, every=100, window_size=2000, tolerance=1e-3): - """ - - Parameters - ---------- - every : int - how often check convergence - window_size : - last elbos to take - tolerance : float - Error rate under null hypothesis, consider taking small values - """ - self.every = every - self.window_size = window_size - self.critical = tolerance / 2. - - def __call__(self, approx, hist, i): - if hist is None or i < self.window_size or i % self.every: - return - diff = ((hist[-self.window_size:] - hist[-self.window_size-1:-1]) - / hist[-self.window_size-1:-1]) - mean = diff.mean() - # unbiased std of mean - std = diff.std() / (self.window_size - 1)**.5 - t = abs(mean / std) - p = stats.t.cdf(t, df=self.window_size) - .5 - # 1 - confidence is lower allowed p - if p < self.critical: - raise StopIteration - - -class CheckLossConvergence2(Callback): - def __init__(self, every=100, tolerance=1e-2, steps=None): - self.steps = steps +class CheckParametersConvergence(Callback): + def __init__(self, every=1000, tolerance=1e-2): self.every = every + self.prev = None self.tolerance = tolerance - def __call__(self, approx, hist, i): - if hist is None or i < self.every or i % self.every: + def __call__(self, approx, _, i): + if self.prev is None: + self.prev = self.flatten_shared(approx.params) + if i < self.every or i % self.every: return - if self.steps is None: - window = int(max(0.1 * hist.size // self.every, 2.0)) - else: - window = int(max(0.1 * self.steps // self.every, 2.0)) - losses = hist[::self.every][-window:] - diff = np.abs((losses[1:]-losses[:-1])/losses[:-1]) - mean = np.mean(diff) - med = np.median(diff) - if mean < self.tolerance or med < self.tolerance: + current = self.flatten_shared(approx.params) + delta = (current - self.prev)/self.prev + self.prev = current + delta[np.isnan(delta)] = 0 + norm = delta.dot(delta)**.5 + if norm < self.tolerance: raise StopIteration + + @staticmethod + def flatten_shared(shared_list): + return np.concatenate([sh.get_value().flatten() for sh in shared_list]) From d4fd91423b78dc67b019d96c2e985f74757d5fab Mon Sep 17 00:00:00 2001 From: Maxim Kochurov Date: Sat, 15 Apr 2017 11:34:50 +0300 Subject: [PATCH 15/28] fix typo --- pymc3/sampling.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pymc3/sampling.py b/pymc3/sampling.py index 785596ec7d..40bd0867d2 100644 --- a/pymc3/sampling.py +++ b/pymc3/sampling.py @@ -582,7 +582,7 @@ def init_nuts(init='ADVI', njobs=1, n_init=500000, model=None, pm.fit( seed=random_seed, n=n_init, method=pm.ADVI.from_mean_field(approx), - callbacks=[pm.callbacks.CheckLossConvergence()] + callbacks=[pm.callbacks.CheckParametersConvergence()] ) start = approx.sample(draws=njobs) cov = approx.cov.eval() From ce70f4a57baa2b229d6b6963af4d5e874c68714c Mon Sep 17 00:00:00 2001 From: Maxim Kochurov Date: Sat, 15 Apr 2017 11:35:22 +0300 Subject: [PATCH 16/28] check nan --- pymc3/variational/inference.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/pymc3/variational/inference.py b/pymc3/variational/inference.py index 85073a1de5..ebc000a099 100644 --- a/pymc3/variational/inference.py +++ b/pymc3/variational/inference.py @@ -153,8 +153,10 @@ def fit(self, n=10000, score=None, callbacks=None, else: # pragma: no cover scores = np.asarray(()) try: - for _ in progress: + for i in progress: step_func() + if np.isnan(self.approx.params[0].get_value()).any(): + raise FloatingPointError('NaN occurred in optimization.') for callback in callbacks: callback(self.approx, None, i) except (KeyboardInterrupt, StopIteration): From 5e5756af29e4231f26fe08186c633a0c73fcd71b Mon Sep 17 00:00:00 2001 From: Maxim Kochurov Date: Wed, 19 Apr 2017 20:41:23 +0300 Subject: [PATCH 17/28] refactor callback --- pymc3/sampling.py | 4 ++-- pymc3/variational/callbacks.py | 12 +++++++----- pymc3/variational/inference.py | 9 ++++++--- 3 files changed, 15 insertions(+), 10 deletions(-) diff --git a/pymc3/sampling.py b/pymc3/sampling.py index 40bd0867d2..2f08726027 100644 --- a/pymc3/sampling.py +++ b/pymc3/sampling.py @@ -570,7 +570,7 @@ def init_nuts(init='ADVI', njobs=1, n_init=500000, model=None, approx = pm.fit( seed=random_seed, n=n_init, method='advi', model=model, - callbacks=[pm.callbacks.CheckLossConvergence()] + callbacks=[pm.callbacks.CheckParametersConvergence(tolerance=1e-2)] ) # type: pm.MeanField start = approx.sample(draws=njobs) cov = approx.cov.eval() @@ -582,7 +582,7 @@ def init_nuts(init='ADVI', njobs=1, n_init=500000, model=None, pm.fit( seed=random_seed, n=n_init, method=pm.ADVI.from_mean_field(approx), - callbacks=[pm.callbacks.CheckParametersConvergence()] + callbacks=[pm.callbacks.CheckParametersConvergence(tolerance=1e-2)] ) start = approx.sample(draws=njobs) cov = approx.cov.eval() diff --git a/pymc3/variational/callbacks.py b/pymc3/variational/callbacks.py index 96e4d21fde..e48d55dfe0 100644 --- a/pymc3/variational/callbacks.py +++ b/pymc3/variational/callbacks.py @@ -12,10 +12,11 @@ def __call__(self, approx, loss, i): class CheckParametersConvergence(Callback): - def __init__(self, every=1000, tolerance=1e-2): + def __init__(self, every=1000, tolerance=1e-3, eps=1e-10): self.every = every self.prev = None self.tolerance = tolerance + self.eps = np.float32(eps) def __call__(self, approx, _, i): if self.prev is None: @@ -23,12 +24,13 @@ def __call__(self, approx, _, i): if i < self.every or i % self.every: return current = self.flatten_shared(approx.params) - delta = (current - self.prev)/self.prev + prev = self.prev + eps = self.eps + delta = (np.abs(current - prev)+eps)/(np.abs(prev)+eps) self.prev = current - delta[np.isnan(delta)] = 0 - norm = delta.dot(delta)**.5 + norm = delta.max() if norm < self.tolerance: - raise StopIteration + raise StopIteration('Convergence archived') @staticmethod def flatten_shared(shared_list): diff --git a/pymc3/variational/inference.py b/pymc3/variational/inference.py index ebc000a099..308bc8948c 100644 --- a/pymc3/variational/inference.py +++ b/pymc3/variational/inference.py @@ -131,10 +131,12 @@ def fit(self, n=10000, score=None, callbacks=None, progress.set_description('Average Loss = {:,.5g}'.format(avg_loss)) for callback in callbacks: callback(self.approx, scores[:i+1], i) - except (KeyboardInterrupt, StopIteration): # pragma: no cover + except (KeyboardInterrupt, StopIteration) as e: # do not print log on the same line progress.close() scores = scores[:i] + if isinstance(e, StopIteration): + logger.info(str(e)) if n < 10: logger.info('Interrupted at {:,d} [{:.0f}%]: Loss = {:,.5g}'.format( i, 100 * i // n, scores[i])) @@ -159,8 +161,9 @@ def fit(self, n=10000, score=None, callbacks=None, raise FloatingPointError('NaN occurred in optimization.') for callback in callbacks: callback(self.approx, None, i) - except (KeyboardInterrupt, StopIteration): - pass + except (KeyboardInterrupt, StopIteration) as e: + if isinstance(e, StopIteration): + logger.info(str(e)) finally: progress.close() self.hist = np.concatenate([self.hist, scores]) From 9c61bf2535e3b8d1b6a38428c540b4166d84f18e Mon Sep 17 00:00:00 2001 From: Maxim Kochurov Date: Wed, 19 Apr 2017 23:57:23 +0300 Subject: [PATCH 18/28] fix pylint, good catch --- pymc3/variational/approximations.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pymc3/variational/approximations.py b/pymc3/variational/approximations.py index 35df6a88d3..5a6dce2963 100644 --- a/pymc3/variational/approximations.py +++ b/pymc3/variational/approximations.py @@ -12,7 +12,7 @@ 'MeanField', 'FullRank', 'Histogram', - 'sample_vp' + 'sample_approx' ] From daa9ee0f779149fadf82eda6ba680a5211f04bdc Mon Sep 17 00:00:00 2001 From: Maxim Kochurov Date: Thu, 20 Apr 2017 00:06:12 +0300 Subject: [PATCH 19/28] Histogram -> Empirical (API change!) Following discussion in #1953 CC @twiecki, @fonnesbeck, @aseyboldt, @jsalvatier, @taku-y, @springcoil --- pymc3/tests/test_variational_inference.py | 34 +++++++++++------------ pymc3/variational/__init__.py | 2 +- pymc3/variational/approximations.py | 12 ++++---- pymc3/variational/inference.py | 8 +++--- pymc3/variational/operators.py | 6 ++-- 5 files changed, 31 insertions(+), 31 deletions(-) diff --git a/pymc3/tests/test_variational_inference.py b/pymc3/tests/test_variational_inference.py index 9e3e4cacb6..2bf647e609 100644 --- a/pymc3/tests/test_variational_inference.py +++ b/pymc3/tests/test_variational_inference.py @@ -7,7 +7,7 @@ from pymc3 import Model, Normal from pymc3.variational import ( ADVI, FullRankADVI, SVGD, - Histogram, + Empirical, fit ) from pymc3.variational.operators import KL @@ -281,18 +281,18 @@ class TestSVGD(TestApproximates.Base): optimizer = functools.partial(pm.adam, learning_rate=.1) -class TestHistogram(SeededTest): +class TestEmpirical(SeededTest): def test_sampling(self): with models.multidimensional_model()[1]: full_rank = FullRankADVI() approx = full_rank.fit(20) trace0 = approx.sample(10000) - histogram = Histogram(trace0) - trace1 = histogram.sample(100000) + approx = Empirical(trace0) + trace1 = approx.sample(100000) np.testing.assert_allclose(trace0['x'].mean(0), trace1['x'].mean(0), atol=0.01) np.testing.assert_allclose(trace0['x'].var(0), trace1['x'].var(0), atol=0.01) - def test_aevb_histogram(self): + def test_aevb_empirical(self): _, model, _ = models.exponential_beta(n=2) x = model.x mu = theano.shared(x.init_value) @@ -301,10 +301,10 @@ def test_aevb_histogram(self): inference = ADVI(local_rv={x: (mu, rho)}) approx = inference.approx trace0 = approx.sample(10000) - histogram = Histogram(trace0, local_rv={x: (mu, rho)}) - trace1 = histogram.sample(10000) - histogram.random(no_rand=True) - histogram.random_fn(no_rand=True) + approx = Empirical(trace0, local_rv={x: (mu, rho)}) + trace1 = approx.sample(10000) + approx.random(no_rand=True) + approx.random_fn(no_rand=True) np.testing.assert_allclose(trace0['y'].mean(0), trace1['y'].mean(0), atol=0.02) np.testing.assert_allclose(trace0['y'].var(0), trace1['y'].var(0), atol=0.02) np.testing.assert_allclose(trace0['x'].mean(0), trace1['x'].mean(0), atol=0.02) @@ -317,17 +317,17 @@ def test_random_with_transformed(self): p = pm.Uniform('p') pm.Bernoulli('trials', p, observed=trials) trace = pm.sample(1000, step=pm.Metropolis()) - histogram = Histogram(trace) - histogram.randidx(None).eval() - histogram.randidx(1).eval() - histogram.random_fn(no_rand=True) - histogram.random_fn(no_rand=False) - histogram.histogram_logp.eval() + approx = Empirical(trace) + approx.randidx(None).eval() + approx.randidx(1).eval() + approx.random_fn(no_rand=True) + approx.random_fn(no_rand=False) + approx.histogram_logp.eval() def test_init_from_noize(self): with models.multidimensional_model()[1]: - histogram = Histogram.from_noise(100) - assert histogram.histogram.eval().shape == (100, 6) + approx = Empirical.from_noise(100) + assert approx.histogram.eval().shape == (100, 6) _model = models.simple_model()[1] with _model: diff --git a/pymc3/variational/__init__.py b/pymc3/variational/__init__.py index a61d63c8db..33d72c89fc 100644 --- a/pymc3/variational/__init__.py +++ b/pymc3/variational/__init__.py @@ -23,7 +23,7 @@ fit, ) from .approximations import ( - Histogram, + Empirical, FullRank, MeanField, sample_approx diff --git a/pymc3/variational/approximations.py b/pymc3/variational/approximations.py index 5a6dce2963..a96518e30c 100644 --- a/pymc3/variational/approximations.py +++ b/pymc3/variational/approximations.py @@ -11,7 +11,7 @@ __all__ = [ 'MeanField', 'FullRank', - 'Histogram', + 'Empirical', 'sample_approx' ] @@ -232,7 +232,7 @@ def from_mean_field(cls, mean_field, gpu_compat=False): return full_rank -class Histogram(Approximation): +class Empirical(Approximation): """ Builds Approximation instance from a given trace, it has the same interface as variational approximation @@ -241,7 +241,7 @@ class Histogram(Approximation): ---------- trace : MultiTrace local_rv : dict - Experimental for Histogram + Experimental for Histogram approximation mapping {model_variable -> local_variable} Local Vars are used for Autoencoding Variational Bayes See (AEVB; Kingma and Welling, 2014) for details @@ -257,10 +257,10 @@ class Histogram(Approximation): >>> with model: ... step = NUTS() ... trace = sample(1000, step=step) - ... histogram = Histogram(trace[100:]) + ... histogram = Empirical(trace[100:]) """ def __init__(self, trace, local_rv=None, model=None, seed=None): - super(Histogram, self).__init__(local_rv=local_rv, model=model, trace=trace, seed=seed) + super(Empirical, self).__init__(local_rv=local_rv, model=model, trace=trace, seed=seed) def check_model(self, model, **kwargs): trace = kwargs.get('trace') @@ -358,7 +358,7 @@ def from_noise(cls, size, jitter=.01, local_rv=None, start=None, model=None, see Returns ------- - Histogram + Empirical """ hist = cls(None, local_rv=local_rv, model=model, seed=seed) if start is None: diff --git a/pymc3/variational/inference.py b/pymc3/variational/inference.py index 308bc8948c..bc37130983 100644 --- a/pymc3/variational/inference.py +++ b/pymc3/variational/inference.py @@ -7,7 +7,7 @@ import numpy as np import pymc3 as pm -from pymc3.variational.approximations import MeanField, FullRank, Histogram +from pymc3.variational.approximations import MeanField, FullRank, Empirical from pymc3.variational.operators import KL, KSD from pymc3.variational.opvi import Approximation from pymc3.variational import test_functions @@ -377,8 +377,8 @@ class SVGD(Inference): kernel function for KSD f(histogram) -> (k(x,.), \nabla_x k(x,.)) start : dict initial point for inference - histogram : Histogram - initialize SVGD with given Histogram instead of default initial particles + histogram : Empirical + initialize SVGD with given Empirical approximation instead of default initial particles seed : None or int leave None to use package global RandomStream or other valid value to create instance specific one @@ -392,7 +392,7 @@ class SVGD(Inference): def __init__(self, n_particles=100, jitter=.01, model=None, kernel=test_functions.rbf, start=None, histogram=None, seed=None, local_rv=None): if histogram is None: - histogram = Histogram.from_noise( + histogram = Empirical.from_noise( n_particles, jitter=jitter, start=start, model=model, local_rv=local_rv, seed=seed) super(SVGD, self).__init__( KSD, histogram, diff --git a/pymc3/variational/operators.py b/pymc3/variational/operators.py index ceb4fe6633..38a3726fa1 100644 --- a/pymc3/variational/operators.py +++ b/pymc3/variational/operators.py @@ -50,7 +50,7 @@ class KSD(Operator): Parameters ---------- - approx : pm.Histogram + approx : pm.Empirical References ---------- @@ -64,8 +64,8 @@ class KSD(Operator): OBJECTIVE = KSDObjective def __init__(self, approx): - if not isinstance(approx, pm.Histogram): - raise ValueError('approx should be a Histogram, got %r' % approx) + if not isinstance(approx, pm.Empirical): + raise ValueError('approx should be an Empirical approximation, got %r' % approx) Operator.__init__(self, approx) def apply(self, f): From 8b2321871c9b8f19de7fd392ae388979b277118a Mon Sep 17 00:00:00 2001 From: Maxim Kochurov Date: Thu, 20 Apr 2017 00:35:54 +0300 Subject: [PATCH 20/28] refactor boilerplate Inference --- pymc3/variational/inference.py | 118 +++++++++++++++++---------------- 1 file changed, 62 insertions(+), 56 deletions(-) diff --git a/pymc3/variational/inference.py b/pymc3/variational/inference.py index bc37130983..7766468d8f 100644 --- a/pymc3/variational/inference.py +++ b/pymc3/variational/inference.py @@ -88,7 +88,7 @@ def run_profiling(self, n=1000, score=None, **kwargs): progress.close() return step_func.profile - def fit(self, n=10000, score=None, callbacks=None, + def fit(self, n=10000, score=None, callbacks=None, progressbar=True, **kwargs): """ Performs Operator Variational Inference @@ -100,9 +100,9 @@ def fit(self, n=10000, score=None, callbacks=None, score : bool evaluate loss on each iteration or not callbacks : list[function : (Approximation, losses, i) -> any] - callback_every : int - call callback functions on `callback_every` step, to - interrupt inference raise `StopIteration` exception inside callback + calls provided functions after each iteration step + progressbar : bool + whether to show progressbar or not kwargs : kwargs for ObjectiveFunction.step_function Returns @@ -113,61 +113,67 @@ def fit(self, n=10000, score=None, callbacks=None, callbacks = [] score = self._maybe_score(score) step_func = self.objective.step_function(score=score, **kwargs) - i = 0 - progress = tqdm.trange(n) + progress = tqdm.trange(n, disable=not progressbar) if score: - scores = np.empty(n) - scores[:] = np.nan - try: - for i in progress: - e = step_func() - if np.isnan(e): # pragma: no cover - scores = scores[:i] - self.hist = np.concatenate([self.hist, scores]) - raise FloatingPointError('NaN occurred in optimization.') - scores[i] = e - if i % 10 == 0: - avg_loss = scores[max(0, i - 1000):i+1].mean() - progress.set_description('Average Loss = {:,.5g}'.format(avg_loss)) - for callback in callbacks: - callback(self.approx, scores[:i+1], i) - except (KeyboardInterrupt, StopIteration) as e: - # do not print log on the same line - progress.close() - scores = scores[:i] - if isinstance(e, StopIteration): - logger.info(str(e)) - if n < 10: - logger.info('Interrupted at {:,d} [{:.0f}%]: Loss = {:,.5g}'.format( - i, 100 * i // n, scores[i])) - else: - avg_loss = scores[min(0, i - 1000):i+1].mean() - logger.info('Interrupted at {:,d} [{:.0f}%]: Average Loss = {:,.5g}'.format( - i, 100 * i // n, avg_loss)) + self._iterate_with_loss(n, step_func, progress, callbacks) + else: + self._iterate_without_loss(n, step_func, progress, callbacks) + return self.approx + + def _iterate_without_loss(self, _, step_func, progress, callbacks): + try: + for i in progress: + step_func() + if np.isnan(self.approx.params[0].get_value()).any(): + raise FloatingPointError('NaN occurred in optimization.') + for callback in callbacks: + callback(self.approx, None, i) + except (KeyboardInterrupt, StopIteration) as e: + progress.close() + if isinstance(e, StopIteration): + logger.info(str(e)) + finally: + progress.close() + + def _iterate_with_loss(self, n, step_func, progress, callbacks): + scores = np.empty(n) + scores[:] = np.nan + i = 0 + try: + for i in progress: + e = step_func() + if np.isnan(e): # pragma: no cover + scores = scores[:i] + self.hist = np.concatenate([self.hist, scores]) + raise FloatingPointError('NaN occurred in optimization.') + scores[i] = e + if i % 10 == 0: + avg_loss = scores[max(0, i - 1000):i + 1].mean() + progress.set_description('Average Loss = {:,.5g}'.format(avg_loss)) + for callback in callbacks: + callback(self.approx, scores[:i + 1], i) + except (KeyboardInterrupt, StopIteration) as e: + # do not print log on the same line + progress.close() + scores = scores[:i] + if isinstance(e, StopIteration): + logger.info(str(e)) + if n < 10: + logger.info('Interrupted at {:,d} [{:.0f}%]: Loss = {:,.5g}'.format( + i, 100 * i // n, scores[i])) else: - if n < 10: - logger.info('Finished [100%]: Loss = {:,.5g}'.format(scores[-1])) - else: - avg_loss = scores[max(0, i - 1000):i+1].mean() - logger.info('Finished [100%]: Average Loss = {:,.5g}'.format(avg_loss)) - finally: - progress.close() - else: # pragma: no cover - scores = np.asarray(()) - try: - for i in progress: - step_func() - if np.isnan(self.approx.params[0].get_value()).any(): - raise FloatingPointError('NaN occurred in optimization.') - for callback in callbacks: - callback(self.approx, None, i) - except (KeyboardInterrupt, StopIteration) as e: - if isinstance(e, StopIteration): - logger.info(str(e)) - finally: - progress.close() + avg_loss = scores[min(0, i - 1000):i + 1].mean() + logger.info('Interrupted at {:,d} [{:.0f}%]: Average Loss = {:,.5g}'.format( + i, 100 * i // n, avg_loss)) + else: + if n < 10: + logger.info('Finished [100%]: Loss = {:,.5g}'.format(scores[-1])) + else: + avg_loss = scores[max(0, i - 1000):i + 1].mean() + logger.info('Finished [100%]: Average Loss = {:,.5g}'.format(avg_loss)) + finally: + progress.close() self.hist = np.concatenate([self.hist, scores]) - return self.approx class ADVI(Inference): From 3e6311eb485c7ea7c554d7ceb9b64335e5782dda Mon Sep 17 00:00:00 2001 From: Maxim Kochurov Date: Thu, 20 Apr 2017 00:36:41 +0300 Subject: [PATCH 21/28] refactor callback --- pymc3/variational/callbacks.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pymc3/variational/callbacks.py b/pymc3/variational/callbacks.py index e48d55dfe0..e3caa6013a 100644 --- a/pymc3/variational/callbacks.py +++ b/pymc3/variational/callbacks.py @@ -21,7 +21,7 @@ def __init__(self, every=1000, tolerance=1e-3, eps=1e-10): def __call__(self, approx, _, i): if self.prev is None: self.prev = self.flatten_shared(approx.params) - if i < self.every or i % self.every: + if i % self.every or i < self.every: return current = self.flatten_shared(approx.params) prev = self.prev @@ -30,7 +30,7 @@ def __call__(self, approx, _, i): self.prev = current norm = delta.max() if norm < self.tolerance: - raise StopIteration('Convergence archived') + raise StopIteration('Convergence archived at %d' % i) @staticmethod def flatten_shared(shared_list): From c7e0f405f35d1ad4ba90c0b2cf681240d7d42773 Mon Sep 17 00:00:00 2001 From: Maxim Kochurov Date: Thu, 20 Apr 2017 00:50:39 +0300 Subject: [PATCH 22/28] add progressbar supprot for init_nuts --- pymc3/sampling.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/pymc3/sampling.py b/pymc3/sampling.py index 2f08726027..7a0e20f427 100644 --- a/pymc3/sampling.py +++ b/pymc3/sampling.py @@ -570,7 +570,8 @@ def init_nuts(init='ADVI', njobs=1, n_init=500000, model=None, approx = pm.fit( seed=random_seed, n=n_init, method='advi', model=model, - callbacks=[pm.callbacks.CheckParametersConvergence(tolerance=1e-2)] + callbacks=[pm.callbacks.CheckParametersConvergence(tolerance=1e-2)], + progressbar=progressbar ) # type: pm.MeanField start = approx.sample(draws=njobs) cov = approx.cov.eval() @@ -582,7 +583,8 @@ def init_nuts(init='ADVI', njobs=1, n_init=500000, model=None, pm.fit( seed=random_seed, n=n_init, method=pm.ADVI.from_mean_field(approx), - callbacks=[pm.callbacks.CheckParametersConvergence(tolerance=1e-2)] + callbacks=[pm.callbacks.CheckParametersConvergence(tolerance=1e-2)], + progressbar=progressbar ) start = approx.sample(draws=njobs) cov = approx.cov.eval() From 5e750e7626f5cb4433c1e3a80f7866e73a92426f Mon Sep 17 00:00:00 2001 From: Maxim Kochurov Date: Thu, 20 Apr 2017 22:45:00 +0300 Subject: [PATCH 23/28] launch tt_rng before return --- pymc3/theanof.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/pymc3/theanof.py b/pymc3/theanof.py index 10c5efcce6..a353895ae1 100644 --- a/pymc3/theanof.py +++ b/pymc3/theanof.py @@ -391,7 +391,9 @@ def tt_rng(seed=None): if seed is None: return _tt_rng else: - return MRG_RandomStreams(seed) + ret = MRG_RandomStreams(seed) + launch_rng(ret) + return ret def set_tt_rng(new_rng): From f62fd25c06f059be25d191224d90dc1a3791be30 Mon Sep 17 00:00:00 2001 From: Maxim Kochurov Date: Fri, 21 Apr 2017 00:00:28 +0300 Subject: [PATCH 24/28] Docs --- pymc3/variational/inference.py | 117 +++++++++++++++++++++++++++++++-- pymc3/variational/opvi.py | 71 ++++++++++++++++---- 2 files changed, 171 insertions(+), 17 deletions(-) diff --git a/pymc3/variational/inference.py b/pymc3/variational/inference.py index 7766468d8f..04f087306e 100644 --- a/pymc3/variational/inference.py +++ b/pymc3/variational/inference.py @@ -179,11 +179,120 @@ def _iterate_with_loss(self, n, step_func, progress, callbacks): class ADVI(Inference): """ Automatic Differentiation Variational Inference (ADVI) + + This class implements the meanfield ADVI, where the variational + posterior distribution is assumed to be spherical Gaussian without + correlation of parameters and fit to the true posterior distribution. + The means and standard deviations of the variational posterior are referred + to as variational parameters. + + For explanation, we classify random variables in probabilistic models into + three types. Observed random variables + :math:`{\cal Y}=\{\mathbf{y}_{i}\}_{i=1}^{N}` are :math:`N` observations. + Each :math:`\mathbf{y}_{i}` can be a set of observed random variables, + i.e., :math:`\mathbf{y}_{i}=\{\mathbf{y}_{i}^{k}\}_{k=1}^{V_{o}}`, where + :math:`V_{k}` is the number of the types of observed random variables + in the model. + + The next ones are global random variables + :math:`\Theta=\{\\theta^{k}\}_{k=1}^{V_{g}}`, which are used to calculate + the probabilities for all observed samples. + + The last ones are local random variables + :math:`{\cal Z}=\{\mathbf{z}_{i}\}_{i=1}^{N}`, where + :math:`\mathbf{z}_{i}=\{\mathbf{z}_{i}^{k}\}_{k=1}^{V_{l}}`. + These RVs are used only in AEVB. + + The goal of ADVI is to approximate the posterior distribution + :math:`p(\Theta,{\cal Z}|{\cal Y})` by variational posterior + :math:`q(\Theta)\prod_{i=1}^{N}q(\mathbf{z}_{i})`. All of these terms + are normal distributions (mean-field approximation). + + :math:`q(\Theta)` is parametrized with its means and standard deviations. + These parameters are denoted as :math:`\gamma`. While :math:`\gamma` is + a constant, the parameters of :math:`q(\mathbf{z}_{i})` are dependent on + each observation. Therefore these parameters are denoted as + :math:`\\xi(\mathbf{y}_{i}; \\nu)`, where :math:`\\nu` is the parameters + of :math:`\\xi(\cdot)`. For example, :math:`\\xi(\cdot)` can be a + multilayer perceptron or convolutional neural network. + + In addition to :math:`\\xi(\cdot)`, we can also include deterministic + mappings for the likelihood of observations. We denote the parameters of + the deterministic mappings as :math:`\eta`. An example of such mappings is + the deconvolutional neural network used in the convolutional VAE example + in the PyMC3 notebook directory. + + This function maximizes the evidence lower bound (ELBO) + :math:`{\cal L}(\gamma, \\nu, \eta)` defined as follows: + .. math:: + + {\cal L}(\gamma,\\nu,\eta) & = + \mathbf{c}_{o}\mathbb{E}_{q(\Theta)}\left[ + \sum_{i=1}^{N}\mathbb{E}_{q(\mathbf{z}_{i})}\left[ + \log p(\mathbf{y}_{i}|\mathbf{z}_{i},\Theta,\eta) + \\right]\\right] \\\\ & + - \mathbf{c}_{g}KL\left[q(\Theta)||p(\Theta)\\right] + - \mathbf{c}_{l}\sum_{i=1}^{N} + KL\left[q(\mathbf{z}_{i})||p(\mathbf{z}_{i})\\right], + + where :math:`KL[q(v)||p(v)]` is the Kullback-Leibler divergence + + .. math:: + + KL[q(v)||p(v)] = \int q(v)\log\\frac{q(v)}{p(v)}dv, + + :math:`\mathbf{c}_{o/g/l}` are vectors for weighting each term of ELBO. + More precisely, we can write each of the terms in ELBO as follows: + + .. math:: + + \mathbf{c}_{o}\log p(\mathbf{y}_{i}|\mathbf{z}_{i},\Theta,\eta) & = & + \sum_{k=1}^{V_{o}}c_{o}^{k} + \log p(\mathbf{y}_{i}^{k}| + {\\rm pa}(\mathbf{y}_{i}^{k},\Theta,\eta)) \\\\ + \mathbf{c}_{g}KL\left[q(\Theta)||p(\Theta)\\right] & = & + \sum_{k=1}^{V_{g}}c_{g}^{k}KL\left[ + q(\\theta^{k})||p(\\theta^{k}|{\\rm pa(\\theta^{k})})\\right] \\\\ + \mathbf{c}_{l}KL\left[q(\mathbf{z}_{i}||p(\mathbf{z}_{i})\\right] & = & + \sum_{k=1}^{V_{l}}c_{l}^{k}KL\left[ + q(\mathbf{z}_{i}^{k})|| + p(\mathbf{z}_{i}^{k}|{\\rm pa}(\mathbf{z}_{i}^{k}))\\right], + + where :math:`{\\rm pa}(v)` denotes the set of parent variables of :math:`v` + in the directed acyclic graph of the model. + + When using mini-batches, :math:`c_{o}^{k}` and :math:`c_{l}^{k}` should be + set to :math:`N/M`, where :math:`M` is the number of observations in each + mini-batch. This is done with supplying :code:`total_size` parameter to + observed nodes (e.g. :code:`Normal('x', 0, 1, observed=data, total_size=10000)`). + In this case it is possible to automatically determine appropriate scaling for :math:`logp` + of observed nodes. Interesting to note that it is possible to have two independent + observed variables with different :code:`total_size` and iterate them independently + during inference. + + For working with ADVI, we need to give + - The probabilistic model + (:code:`model`), the three types of RVs (:code:`observed_RVs`, + :code:`global_RVs` and :code:`local_RVs`). + + - (optional) Minibatches + The tensors to which mini-bathced samples are supplied are + handled separately by using callbacks in :code:`.fit` method + that change storage of shared theano variable or by :code:`pm.generator` + that automatically iterates over minibatches and defined beforehand. + + - (optional) Parameters of deterministic mappings + They have to be passed along with other params to :code:`.fit` method + as :code:`more_obj_params` argument. + + For more information concerning training stage please reference + :code:`pymc3.variational.opvi.ObjectiveFunction.step_function` + Parameters ---------- - local_rv : dict - mapping {model_variable -> local_variable} + local_rv : dict[var->tuple] + mapping {model_variable -> local_variable (:math:`\\mu`, math:`\\rho`)} Local Vars are used for Autoencoding Variational Bayes See (AEVB; Kingma and Welling, 2014) for details @@ -246,8 +355,8 @@ class FullRankADVI(Inference): Parameters ---------- - local_rv : dict - mapping {model_variable -> local_variable} + local_rv : dict[var->tuple] + mapping {model_variable -> local_variable (:math:`\\mu`, math:`\\rho`)} Local Vars are used for Autoencoding Variational Bayes See (AEVB; Kingma and Welling, 2014) for details diff --git a/pymc3/variational/opvi.py b/pymc3/variational/opvi.py index 28ff46b3a9..cb5d0cf012 100644 --- a/pymc3/variational/opvi.py +++ b/pymc3/variational/opvi.py @@ -1,3 +1,36 @@ +""" +Variational inference is a great approach for doing really complex, +often intractable Bayesian inference in approximate form. Common methods +(e.g. ADVI) lack from complexity so that approximate posterior does not +reveal the true nature of underlying problem. In some applications it can +yield unreliable decisions. + +Recently on NIPS 2017 [OPVI](https://arxiv.org/abs/1610.09033) framework +was presented. It generalizes variational inverence so that the problem is +build with blocks. The first and essential block is Model itself. Second is +Approximation, in some cases :math:`log Q(D)` is not really needed. Necessity +depends on the third and forth part of that black box, Operator and +Test Function respectively. + +Operator is like an approach we use, it constructs loss from given Model, +Approximation and Test Function. The last one is not needed if we minimize +KL Divergence from Q to posterior. As a drawback we need to compute :math:`loq Q(D)`. +Sometimes approximation family is intractable and :math:`loq Q(D)` is not available, +here comes LS(Langevin Stein) Operator with a set of test functions. + +Test Function has more unintuitive meaning. It is usually used with LS operator +and represents all we want from our approximate distribution. For any given vector +based function of :math:`z` LS operator yields zero mean function under posterior. +:math:`loq Q(D)` is no more needed. That opens a door to rich approximation +families as neural networks. + +References +---------- +- Rajesh Ranganath, Jaan Altosaar, Dustin Tran, David M. Blei + Operator Variational Inference + https://arxiv.org/abs/1610.09033 (2016) +""" + import warnings import numpy as np import theano @@ -251,7 +284,7 @@ class Operator(object): Subclassing ----------- - For implementing Custom operator it is needed to define `.apply(f)` method + For implementing Custom operator it is needed to define :code:`.apply(f)` method """ HAS_TEST_FUNCTION = False @@ -417,8 +450,8 @@ class Approximation(object): Parameters ---------- - local_rv : dict - mapping {model_variable -> local_variable} + local_rv : dict[var->tuple] + mapping {model_variable -> local_variable (:math:`\\mu`, math:`\\rho`)} Local Vars are used for Autoencoding Variational Bayes See (AEVB; Kingma and Welling, 2014) for details @@ -439,37 +472,49 @@ class Approximation(object): ----------- Defining an approximation needs custom implementation of the following methods: - - `.create_shared_params()` + - :code:`.create_shared_params(**kwargs)` Returns {dict|list|theano.shared} - - `.random_global(size=None, no_rand=False)` + - :code:`.random_global(size=None, no_rand=False)` Generate samples from posterior. If `no_rand==False`: sample from MAP of initial distribution. Returns TensorVariable - - `.log_q_W_global(z)` + - :code:`.log_q_W_global(z)` It is needed only if used with operator that requires :math:`logq` of an approximation Returns Scalar + + You can also override the following methods: + - :code:`._setup(**kwargs)` + Do some specific stuff having :code:`kwargs` before calling :code:`.create_shared_params` + + - :code:`.check_model(model, **kwargs)` + Do some specific check for model having :code:`kwargs` Notes ----- - There are some defaults for approximation classes that can be + :code:`kwargs` mentioned above are supplied as additional arguments + for :code:`Approximation.__init__` + + There are some defaults class attributes for approximation classes that can be optionally overriden. - - `initial_dist_name` + - :code:`initial_dist_name` string that represents name of the initial distribution. In most cases if will be `uniform` or `normal` - - `initial_dist_map` - float where initial distribution has maximum density + - :code:`initial_dist_map` + float where initial distribution has maximum density + + References ---------- - - Geoffrey Roeder, Yuhuai Wu, David Duvenaud, 2016 + - Geoffrey Roeder, Yuhuai Wu, David Duvenaud, 2016 Sticking the Landing: A Simple Reduced-Variance Gradient for ADVI approximateinference.org/accepted/RoederEtAl2016.pdf - - Kingma, D. P., & Welling, M. (2014). - Auto-Encoding Variational Bayes. stat, 1050, 1. + - Kingma, D. P., & Welling, M. (2014). + Auto-Encoding Variational Bayes. stat, 1050, 1. """ initial_dist_name = 'normal' initial_dist_map = 0. From 1d549f615c58e230bae74342b8956833ea650771 Mon Sep 17 00:00:00 2001 From: Maxim Kochurov Date: Fri, 21 Apr 2017 00:13:00 +0300 Subject: [PATCH 25/28] add Inference to api reference --- docs/source/api/inference.rst | 26 +++++++++++++++++--------- 1 file changed, 17 insertions(+), 9 deletions(-) diff --git a/docs/source/api/inference.rst b/docs/source/api/inference.rst index efbefb82dd..c6f0eeb8c8 100644 --- a/docs/source/api/inference.rst +++ b/docs/source/api/inference.rst @@ -50,26 +50,34 @@ Hamiltonian Monte Carlo Variational ----------- -ADVI +OPVI ^^^^ -.. currentmodule:: pymc3.variational.advi +.. currentmodule:: pymc3.variational.opvi -.. automodule:: pymc3.variational.advi +.. automodule:: pymc3.variational.opvi :members: -ADVI minibatch -^^^^^^^^^^^^^^ +Inference +^^^^^^^^^ -.. currentmodule:: pymc3.variational.advi_minibatch +.. currentmodule:: pymc3.variational.inference -.. automodule:: pymc3.variational.advi_minibatch +.. automodule:: pymc3.variational.inference :members: -ADVI approximations -^^^^^^^^^^^^^^^^^^^ +Approximations +^^^^^^^^^^^^^^ .. currentmodule:: pymc3.variational.approximations .. automodule:: pymc3.variational.approximations :members: + +Operators +^^^^^^^^^ + +.. currentmodule:: pymc3.variational.operators + +.. automodule:: pymc3.variational.operators + :members: From bc56865c1a96e30213029bc99f1413124cde3e28 Mon Sep 17 00:00:00 2001 From: Maxim Kochurov Date: Fri, 21 Apr 2017 00:13:57 +0300 Subject: [PATCH 26/28] fix typo in doc --- docs/source/api/data.rst | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/source/api/data.rst b/docs/source/api/data.rst index bf5b65d898..8febe13e63 100644 --- a/docs/source/api/data.rst +++ b/docs/source/api/data.rst @@ -1,6 +1,6 @@ -***** +**** Data -***** +**** .. currentmodule:: pymc3.data From e80893900374b2ea6b0a72ac38b3e6f0ddc3a768 Mon Sep 17 00:00:00 2001 From: Maxim Kochurov Date: Fri, 21 Apr 2017 00:30:10 +0300 Subject: [PATCH 27/28] make approximation docs more verbose --- pymc3/variational/approximations.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/pymc3/variational/approximations.py b/pymc3/variational/approximations.py index a96518e30c..fdf0563972 100644 --- a/pymc3/variational/approximations.py +++ b/pymc3/variational/approximations.py @@ -25,8 +25,8 @@ class MeanField(Approximation): Parameters ---------- - local_rv : dict - mapping {model_variable -> local_variable} + local_rv : dict[var->tuple] + mapping {model_variable -> local_variable (:math:`\\mu`, math:`\\rho`)} Local Vars are used for Autoencoding Variational Bayes See (AEVB; Kingma and Welling, 2014) for details @@ -100,8 +100,8 @@ class FullRank(Approximation): Parameters ---------- - local_rv : dict - mapping {model_variable -> local_variable} + local_rv : dict[var->tuple] + mapping {model_variable -> local_variable (:math:`\\mu`, math:`\\rho`)} Local Vars are used for Autoencoding Variational Bayes See (AEVB; Kingma and Welling, 2014) for details @@ -240,9 +240,9 @@ class Empirical(Approximation): Parameters ---------- trace : MultiTrace - local_rv : dict - Experimental for Histogram approximation - mapping {model_variable -> local_variable} + local_rv : dict[var->tuple] + Experimental for Empirical Distribution + mapping {model_variable -> local_variable (:math:`\\mu`, math:`\\rho`)} Local Vars are used for Autoencoding Variational Bayes See (AEVB; Kingma and Welling, 2014) for details From 8f99a2c3ac4c0a9290ce94a1fa6e884c0ab5a803 Mon Sep 17 00:00:00 2001 From: Maxim Kochurov Date: Fri, 21 Apr 2017 21:09:31 +0300 Subject: [PATCH 28/28] fix typos --- pymc3/variational/approximations.py | 8 ++++---- pymc3/variational/inference.py | 8 ++++---- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/pymc3/variational/approximations.py b/pymc3/variational/approximations.py index fdf0563972..98dd2e9e4c 100644 --- a/pymc3/variational/approximations.py +++ b/pymc3/variational/approximations.py @@ -26,7 +26,7 @@ class MeanField(Approximation): Parameters ---------- local_rv : dict[var->tuple] - mapping {model_variable -> local_variable (:math:`\\mu`, math:`\\rho`)} + mapping {model_variable -> local_variable (:math:`\\mu`, :math:`\\rho`)} Local Vars are used for Autoencoding Variational Bayes See (AEVB; Kingma and Welling, 2014) for details @@ -101,7 +101,7 @@ class FullRank(Approximation): Parameters ---------- local_rv : dict[var->tuple] - mapping {model_variable -> local_variable (:math:`\\mu`, math:`\\rho`)} + mapping {model_variable -> local_variable (:math:`\\mu`, :math:`\\rho`)} Local Vars are used for Autoencoding Variational Bayes See (AEVB; Kingma and Welling, 2014) for details @@ -241,8 +241,8 @@ class Empirical(Approximation): ---------- trace : MultiTrace local_rv : dict[var->tuple] - Experimental for Empirical Distribution - mapping {model_variable -> local_variable (:math:`\\mu`, math:`\\rho`)} + Experimental for Empirical Approximation + mapping {model_variable -> local_variable (:math:`\\mu`, :math:`\\rho`)} Local Vars are used for Autoencoding Variational Bayes See (AEVB; Kingma and Welling, 2014) for details diff --git a/pymc3/variational/inference.py b/pymc3/variational/inference.py index 04f087306e..9b6dcafeaf 100644 --- a/pymc3/variational/inference.py +++ b/pymc3/variational/inference.py @@ -292,7 +292,7 @@ class ADVI(Inference): Parameters ---------- local_rv : dict[var->tuple] - mapping {model_variable -> local_variable (:math:`\\mu`, math:`\\rho`)} + mapping {model_variable -> local_variable (:math:`\\mu`, :math:`\\rho`)} Local Vars are used for Autoencoding Variational Bayes See (AEVB; Kingma and Welling, 2014) for details @@ -356,7 +356,7 @@ class FullRankADVI(Inference): Parameters ---------- local_rv : dict[var->tuple] - mapping {model_variable -> local_variable (:math:`\\mu`, math:`\\rho`)} + mapping {model_variable -> local_variable (:math:`\\mu`, :math:`\\rho`)} Local Vars are used for Autoencoding Variational Bayes See (AEVB; Kingma and Welling, 2014) for details @@ -523,8 +523,8 @@ def fit(n=10000, local_rv=None, method='advi', model=None, seed=None, **kwargs): ---------- n : int number of iterations - local_rv : dict - mapping {model_variable -> local_variable} + local_rv : dict[var->tuple] + mapping {model_variable -> local_variable (:math:`\\mu`, :math:`\\rho`)} Local Vars are used for Autoencoding Variational Bayes See (AEVB; Kingma and Welling, 2014) for details method : str or Inference