Skip to content

Sample vp for Approximations, deprecate old ADVI #2027

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 28 commits into from
Apr 22, 2017
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions docs/source/api/data.rst
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
*****
****
Data
*****
****

.. currentmodule:: pymc3.data

Expand Down
26 changes: 17 additions & 9 deletions docs/source/api/inference.rst
Original file line number Diff line number Diff line change
Expand Up @@ -50,26 +50,34 @@ Hamiltonian Monte Carlo
Variational
-----------

ADVI
OPVI
^^^^

.. currentmodule:: pymc3.variational.advi
.. currentmodule:: pymc3.variational.opvi

.. automodule:: pymc3.variational.advi
.. automodule:: pymc3.variational.opvi
:members:

ADVI minibatch
^^^^^^^^^^^^^^
Inference
^^^^^^^^^

.. currentmodule:: pymc3.variational.advi_minibatch
.. currentmodule:: pymc3.variational.inference

.. automodule:: pymc3.variational.advi_minibatch
.. automodule:: pymc3.variational.inference
:members:

ADVI approximations
^^^^^^^^^^^^^^^^^^^
Approximations
^^^^^^^^^^^^^^

.. currentmodule:: pymc3.variational.approximations

.. automodule:: pymc3.variational.approximations
:members:

Operators
^^^^^^^^^

.. currentmodule:: pymc3.variational.operators

.. automodule:: pymc3.variational.operators
:members:
29 changes: 20 additions & 9 deletions pymc3/sampling.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
STEP_METHODS = (NUTS, HamiltonianMC, Metropolis, BinaryMetropolis,
BinaryGibbsMetropolis, Slice, CategoricalGibbsMetropolis)


def assign_step_methods(model, step=None, methods=STEP_METHODS,
step_kwargs=None):
"""Assign model variables to appropriate step methods.
Expand Down Expand Up @@ -566,19 +567,29 @@ def init_nuts(init='ADVI', njobs=1, n_init=500000, model=None,
init = init.lower()

if init == 'advi':
v_params = pm.variational.advi(n=n_init, random_seed=random_seed,
progressbar=progressbar)
start = pm.variational.sample_vp(v_params, njobs, progressbar=False,
hide_transformed=False,
random_seed=random_seed)
approx = pm.fit(
seed=random_seed,
n=n_init, method='advi', model=model,
callbacks=[pm.callbacks.CheckParametersConvergence(tolerance=1e-2)],
progressbar=progressbar
) # type: pm.MeanField
start = approx.sample(draws=njobs)
cov = approx.cov.eval()
if njobs == 1:
start = start[0]
cov = np.power(model.dict_to_array(v_params.stds), 2)
elif init == 'advi_map':
start = pm.find_MAP()
v_params = pm.variational.advi(n=n_init, start=start,
random_seed=random_seed)
cov = np.power(model.dict_to_array(v_params.stds), 2)
approx = pm.MeanField(model=model, start=start)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm getting meaningful differences with this change on this model: http://twiecki.github.io/blog/2017/03/14/random-walk-deep-net/

It does not abort when converged and sampling from this initialization is very slow.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Do you see any way of improvement?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@twiecki I was suggested to try running minimum stopping criteria. Interesting to see how it will affect performance

pm.fit(
seed=random_seed,
n=n_init, method=pm.ADVI.from_mean_field(approx),
callbacks=[pm.callbacks.CheckParametersConvergence(tolerance=1e-2)],
progressbar=progressbar
)
start = approx.sample(draws=njobs)
cov = approx.cov.eval()
if njobs == 1:
start = start[0]
elif init == 'map':
start = pm.find_MAP()
cov = pm.find_hessian(point=start)
Expand Down
65 changes: 36 additions & 29 deletions pymc3/tests/test_variational_inference.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
from pymc3 import Model, Normal
from pymc3.variational import (
ADVI, FullRankADVI, SVGD,
Histogram,
Empirical,
fit
)
from pymc3.variational.operators import KL
Expand Down Expand Up @@ -59,7 +59,7 @@ def _test_aevb(self):
with model:
inference = self.inference(local_rv={x: (mu, rho)})
approx = inference.fit(3, obj_n_mc=2, obj_optimizer=self.optimizer)
approx.sample_vp(10)
approx.sample(10)
approx.apply_replacements(
y,
more_replacements={x: np.asarray([1, 1], dtype=x.dtype)}
Expand Down Expand Up @@ -105,17 +105,17 @@ def test_vars_view_dynamic_size_numpy(self):
x_sampled = app.view(app.random_fn(), 'x')
assert x_sampled.shape == () + model['x'].dshape

def test_sample_vp(self):
def test_sample(self):
n_samples = 100
xs = np.random.binomial(n=1, p=0.2, size=n_samples)
with pm.Model():
p = pm.Beta('p', alpha=1, beta=1)
pm.Binomial('xs', n=1, p=p, observed=xs)
app = self.inference().approx
trace = app.sample_vp(draws=1, hide_transformed=True)
trace = app.sample(draws=1, hide_transformed=True)
assert trace.varnames == ['p']
assert len(trace) == 1
trace = app.sample_vp(draws=10, hide_transformed=False)
trace = app.sample(draws=10, hide_transformed=False)
assert sorted(trace.varnames) == ['p', 'p_logodds_']
assert len(trace) == 10

Expand Down Expand Up @@ -145,8 +145,11 @@ def test_optimizer_with_full_data(self):
Normal('x', mu=mu_, sd=sd, observed=data)
inf = self.inference()
inf.fit(10)
approx = inf.fit(self.NITER, obj_optimizer=self.optimizer)
trace = approx.sample_vp(10000)
approx = inf.fit(self.NITER,
obj_optimizer=self.optimizer,
callbacks=
[pm.callbacks.CheckParametersConvergence()])
trace = approx.sample(10000)
np.testing.assert_allclose(np.mean(trace['mu']), mu_post, rtol=0.1)
np.testing.assert_allclose(np.std(trace['mu']), np.sqrt(1. / d), rtol=0.4)

Expand All @@ -172,8 +175,10 @@ def create_minibatch(data):
mu_ = Normal('mu', mu=mu0, sd=sd0, testval=0)
Normal('x', mu=mu_, sd=sd, observed=minibatches, total_size=n)
inf = self.inference()
approx = inf.fit(self.NITER * 3, obj_optimizer=self.optimizer)
trace = approx.sample_vp(10000)
approx = inf.fit(self.NITER * 3, obj_optimizer=self.optimizer,
callbacks=
[pm.callbacks.CheckParametersConvergence()])
trace = approx.sample(10000)
np.testing.assert_allclose(np.mean(trace['mu']), mu_post, rtol=0.1)
np.testing.assert_allclose(np.std(trace['mu']), np.sqrt(1. / d), rtol=0.4)

Expand Down Expand Up @@ -203,8 +208,10 @@ def cb(*_):
mu_ = Normal('mu', mu=mu0, sd=sd0, testval=0)
Normal('x', mu=mu_, sd=sd, observed=data_t, total_size=n)
inf = self.inference()
approx = inf.fit(self.NITER * 3, callbacks=[cb], obj_n_mc=10, obj_optimizer=self.optimizer)
trace = approx.sample_vp(10000)
approx = inf.fit(self.NITER * 3, callbacks=
[cb, pm.callbacks.CheckParametersConvergence()],
obj_n_mc=10, obj_optimizer=self.optimizer)
trace = approx.sample(10000)
np.testing.assert_allclose(np.mean(trace['mu']), mu_post, rtol=0.4)
np.testing.assert_allclose(np.std(trace['mu']), np.sqrt(1. / d), rtol=0.4)

Expand Down Expand Up @@ -274,30 +281,30 @@ class TestSVGD(TestApproximates.Base):
optimizer = functools.partial(pm.adam, learning_rate=.1)


class TestHistogram(SeededTest):
class TestEmpirical(SeededTest):
def test_sampling(self):
with models.multidimensional_model()[1]:
full_rank = FullRankADVI()
approx = full_rank.fit(20)
trace0 = approx.sample_vp(10000)
histogram = Histogram(trace0)
trace1 = histogram.sample_vp(100000)
trace0 = approx.sample(10000)
approx = Empirical(trace0)
trace1 = approx.sample(100000)
np.testing.assert_allclose(trace0['x'].mean(0), trace1['x'].mean(0), atol=0.01)
np.testing.assert_allclose(trace0['x'].var(0), trace1['x'].var(0), atol=0.01)

def test_aevb_histogram(self):
def test_aevb_empirical(self):
_, model, _ = models.exponential_beta(n=2)
x = model.x
mu = theano.shared(x.init_value)
rho = theano.shared(np.zeros_like(x.init_value))
with model:
inference = ADVI(local_rv={x: (mu, rho)})
approx = inference.approx
trace0 = approx.sample_vp(10000)
histogram = Histogram(trace0, local_rv={x: (mu, rho)})
trace1 = histogram.sample_vp(10000)
histogram.random(no_rand=True)
histogram.random_fn(no_rand=True)
trace0 = approx.sample(10000)
approx = Empirical(trace0, local_rv={x: (mu, rho)})
trace1 = approx.sample(10000)
approx.random(no_rand=True)
approx.random_fn(no_rand=True)
np.testing.assert_allclose(trace0['y'].mean(0), trace1['y'].mean(0), atol=0.02)
np.testing.assert_allclose(trace0['y'].var(0), trace1['y'].var(0), atol=0.02)
np.testing.assert_allclose(trace0['x'].mean(0), trace1['x'].mean(0), atol=0.02)
Expand All @@ -310,17 +317,17 @@ def test_random_with_transformed(self):
p = pm.Uniform('p')
pm.Bernoulli('trials', p, observed=trials)
trace = pm.sample(1000, step=pm.Metropolis())
histogram = Histogram(trace)
histogram.randidx(None).eval()
histogram.randidx(1).eval()
histogram.random_fn(no_rand=True)
histogram.random_fn(no_rand=False)
histogram.histogram_logp.eval()
approx = Empirical(trace)
approx.randidx(None).eval()
approx.randidx(1).eval()
approx.random_fn(no_rand=True)
approx.random_fn(no_rand=False)
approx.histogram_logp.eval()

def test_init_from_noize(self):
with models.multidimensional_model()[1]:
histogram = Histogram.from_noise(100)
assert histogram.histogram.eval().shape == (100, 6)
approx = Empirical.from_noise(100)
assert approx.histogram.eval().shape == (100, 6)

_model = models.simple_model()[1]
with _model:
Expand Down
17 changes: 14 additions & 3 deletions pymc3/theanof.py
Original file line number Diff line number Diff line change
Expand Up @@ -372,17 +372,28 @@ def launch_rng(rng):
launch_rng(_tt_rng)


def tt_rng():
def tt_rng(seed=None):
"""
Get the package-level random number generator.
Get the package-level random number generator or new with specified seed.

Parameters
----------
seed : int
If not None
returns *new* theano random generator without replacing package global one

Returns
-------
`theano.sandbox.rng_mrg.MRG_RandomStreams` instance
`theano.sandbox.rng_mrg.MRG_RandomStreams`
instance passed to the most recent call of `set_tt_rng`
"""
return _tt_rng
if seed is None:
return _tt_rng
else:
ret = MRG_RandomStreams(seed)
launch_rng(ret)
return ret


def set_tt_rng(new_rng):
Expand Down
6 changes: 4 additions & 2 deletions pymc3/variational/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,9 +23,10 @@
fit,
)
from .approximations import (
Histogram,
Empirical,
FullRank,
MeanField
MeanField,
sample_approx
)

from . import approximations
Expand All @@ -34,3 +35,4 @@
from . import opvi
from . import updates
from . import inference
from . import callbacks
8 changes: 8 additions & 0 deletions pymc3/variational/advi.py
Original file line number Diff line number Diff line change
Expand Up @@ -108,6 +108,10 @@ def advi(vars=None, start=None, model=None, n=5000, accurate_elbo=False,
and Blei, D. M. (2016). Automatic Differentiation Variational
Inference. arXiv preprint arXiv:1603.00788.
"""
import warnings
warnings.warn('Old ADVI interface and sample_vp is deprecated and will '
'be removed in future, use pm.fit and pm.sample_approx instead',
DeprecationWarning, stacklevel=2)
model = pm.modelcontext(model)
if start is None:
start = model.test_point
Expand Down Expand Up @@ -357,6 +361,10 @@ def sample_vp(
trace : pymc3.backends.base.MultiTrace
Samples drawn from the variational posterior.
"""
import warnings
warnings.warn('Old ADVI interface and sample_vp is deprecated and will '
'be removed in future, use pm.fit and pm.sample_approx instead',
DeprecationWarning, stacklevel=2)
model = pm.modelcontext(model)

if isinstance(vparams, ADVIFit):
Expand Down
3 changes: 3 additions & 0 deletions pymc3/variational/advi_minibatch.py
Original file line number Diff line number Diff line change
Expand Up @@ -436,6 +436,9 @@ def advi_minibatch(vars=None, start=None, model=None, n=5000, n_mcsamples=1,
Weight Uncertainty in Neural Network. In Proceedings of the 32nd
International Conference on Machine Learning (ICML-15) (pp. 1613-1622).
"""
import warnings
warnings.warn('Old ADVI interface is deprecated and be removed in future, use pm.ADVI instead',
DeprecationWarning, stacklevel=2)
if encoder_params is None:
encoder_params = []

Expand Down
Loading