@@ -271,21 +271,28 @@ class ADVI(Inference):
271
271
observed variables with different :code:`total_size` and iterate them independently
272
272
during inference.
273
273
274
- For working with ADVI, we need to give
274
+ For working with ADVI, we need to give
275
+
275
276
- The probabilistic model
276
- (:code:`model`), the three types of RVs (:code:`observed_RVs`,
277
+
278
+ :code:`model` with three types of RVs (:code:`observed_RVs`,
277
279
:code:`global_RVs` and :code:`local_RVs`).
278
280
279
281
- (optional) Minibatches
282
+
280
283
The tensors to which mini-bathced samples are supplied are
281
284
handled separately by using callbacks in :code:`.fit` method
282
285
that change storage of shared theano variable or by :code:`pm.generator`
283
286
that automatically iterates over minibatches and defined beforehand.
284
287
285
288
- (optional) Parameters of deterministic mappings
289
+
286
290
They have to be passed along with other params to :code:`.fit` method
287
291
as :code:`more_obj_params` argument.
288
292
293
+
294
+ See Also
295
+ --------
289
296
For more information concerning training stage please reference
290
297
:code:`pymc3.variational.opvi.ObjectiveFunction.step_function`
291
298
@@ -295,35 +302,34 @@ class ADVI(Inference):
295
302
mapping {model_variable -> local_variable (:math:`\\ mu`, :math:`\\ rho`)}
296
303
Local Vars are used for Autoencoding Variational Bayes
297
304
See (AEVB; Kingma and Welling, 2014) for details
298
-
299
- model : PyMC3 model for inference
300
-
301
- cost_part_grad_scale : float or scalar tensor
305
+ model : :class:`Model`
306
+ PyMC3 model for inference
307
+ cost_part_grad_scale : `scalar`
302
308
Scaling score part of gradient can be useful near optimum for
303
309
archiving better convergence properties. Common schedule is
304
310
1 at the start and 0 in the end. So slow decay will be ok.
305
311
See (Sticking the Landing; Geoffrey Roeder,
306
312
Yuhuai Wu, David Duvenaud, 2016) for details
307
- scale_cost_to_minibatch : bool, default False
308
- Scale cost to minibatch instead of full dataset
313
+ scale_cost_to_minibatch : ` bool`
314
+ Scale cost to minibatch instead of full dataset, default False
309
315
seed : None or int
310
316
leave None to use package global RandomStream or other
311
317
valid value to create instance specific one
312
- start : Point
318
+ start : ` Point`
313
319
starting point for inference
314
320
315
321
References
316
322
----------
317
- - Kucukelbir, A., Tran, D., Ranganath, R., Gelman, A.,
323
+ - Kucukelbir, A., Tran, D., Ranganath, R., Gelman, A.,
318
324
and Blei, D. M. (2016). Automatic Differentiation Variational
319
325
Inference. arXiv preprint arXiv:1603.00788.
320
326
321
- - Geoffrey Roeder, Yuhuai Wu, David Duvenaud, 2016
327
+ - Geoffrey Roeder, Yuhuai Wu, David Duvenaud, 2016
322
328
Sticking the Landing: A Simple Reduced-Variance Gradient for ADVI
323
329
approximateinference.org/accepted/RoederEtAl2016.pdf
324
330
325
- - Kingma, D. P., & Welling, M. (2014).
326
- Auto-Encoding Variational Bayes. stat, 1050, 1.
331
+ - Kingma, D. P., & Welling, M. (2014).
332
+ Auto-Encoding Variational Bayes. stat, 1050, 1.
327
333
"""
328
334
def __init__ (self , local_rv = None , model = None ,
329
335
cost_part_grad_scale = 1 ,
@@ -343,12 +349,12 @@ def from_mean_field(cls, mean_field):
343
349
344
350
Parameters
345
351
----------
346
- mean_field : MeanField
352
+ mean_field : :class:` MeanField`
347
353
approximation to start with
348
354
349
355
Returns
350
356
-------
351
- ADVI
357
+ :class:` ADVI`
352
358
"""
353
359
if not isinstance (mean_field , MeanField ):
354
360
raise TypeError ('Expected MeanField, got %r' % mean_field )
@@ -369,10 +375,9 @@ class FullRankADVI(Inference):
369
375
mapping {model_variable -> local_variable (:math:`\\ mu`, :math:`\\ rho`)}
370
376
Local Vars are used for Autoencoding Variational Bayes
371
377
See (AEVB; Kingma and Welling, 2014) for details
372
-
373
- model : PyMC3 model for inference
374
-
375
- cost_part_grad_scale : float or scalar tensor
378
+ model : :class:`Model`
379
+ PyMC3 model for inference
380
+ cost_part_grad_scale : `scalar`
376
381
Scaling score part of gradient can be useful near optimum for
377
382
archiving better convergence properties. Common schedule is
378
383
1 at the start and 0 in the end. So slow decay will be ok.
@@ -383,21 +388,21 @@ class FullRankADVI(Inference):
383
388
seed : None or int
384
389
leave None to use package global RandomStream or other
385
390
valid value to create instance specific one
386
- start : Point
391
+ start : ` Point`
387
392
starting point for inference
388
393
389
394
References
390
395
----------
391
- - Kucukelbir, A., Tran, D., Ranganath, R., Gelman, A.,
396
+ - Kucukelbir, A., Tran, D., Ranganath, R., Gelman, A.,
392
397
and Blei, D. M. (2016). Automatic Differentiation Variational
393
398
Inference. arXiv preprint arXiv:1603.00788.
394
399
395
- - Geoffrey Roeder, Yuhuai Wu, David Duvenaud, 2016
400
+ - Geoffrey Roeder, Yuhuai Wu, David Duvenaud, 2016
396
401
Sticking the Landing: A Simple Reduced-Variance Gradient for ADVI
397
402
approximateinference.org/accepted/RoederEtAl2016.pdf
398
403
399
- - Kingma, D. P., & Welling, M. (2014).
400
- Auto-Encoding Variational Bayes. stat, 1050, 1.
404
+ - Kingma, D. P., & Welling, M. (2014).
405
+ Auto-Encoding Variational Bayes. stat, 1050, 1.
401
406
"""
402
407
def __init__ (self , local_rv = None , model = None ,
403
408
cost_part_grad_scale = 1 ,
@@ -417,12 +422,12 @@ def from_full_rank(cls, full_rank):
417
422
418
423
Parameters
419
424
----------
420
- full_rank : FullRank
425
+ full_rank : :class:` FullRank`
421
426
approximation to start with
422
427
423
428
Returns
424
429
-------
425
- FullRankADVI
430
+ :class:` FullRankADVI`
426
431
"""
427
432
if not isinstance (full_rank , FullRank ):
428
433
raise TypeError ('Expected MeanField, got %r' % full_rank )
@@ -439,17 +444,17 @@ def from_mean_field(cls, mean_field, gpu_compat=False):
439
444
440
445
Parameters
441
446
----------
442
- mean_field : MeanField
447
+ mean_field : :class:` MeanField`
443
448
approximation to start with
444
449
445
- Flags
446
- -----
447
- gpu_compat : bool
450
+ Other Parameters
451
+ ----------------
452
+ gpu_compat : ` bool`
448
453
use GPU compatible version or not
449
454
450
455
Returns
451
456
-------
452
- FullRankADVI
457
+ :class:` FullRankADVI`
453
458
"""
454
459
full_rank = FullRank .from_mean_field (mean_field , gpu_compat )
455
460
inference = object .__new__ (cls )
@@ -465,16 +470,16 @@ def from_advi(cls, advi, gpu_compat=False):
465
470
466
471
Parameters
467
472
----------
468
- advi : ADVI
473
+ advi : :class:` ADVI`
469
474
470
- Flags
471
- -----
475
+ Other Parameters
476
+ ----------------
472
477
gpu_compat : bool
473
478
use GPU compatible version or not
474
479
475
480
Returns
476
481
-------
477
- FullRankADVI
482
+ :class:` FullRankADVI`
478
483
"""
479
484
inference = cls .from_mean_field (advi .approx , gpu_compat )
480
485
inference .hist = advi .hist
@@ -494,35 +499,37 @@ class SVGD(Inference):
494
499
Input: A target distribution with density function :math:`p(x)`
495
500
and a set of initial particles :math:`{x^0_i}^n_{i=1}`
496
501
Output: A set of particles :math:`{x_i}^n_{i=1}` that approximates the target distribution.
502
+
497
503
.. math::
498
504
499
505
x_i^{l+1} \leftarrow \epsilon_l \hat{\phi}^{*}(x_i^l)
500
506
\hat{\phi}^{*}(x) = \f rac{1}{n}\sum^{n}_{j=1}[k(x^l_j,x) \n abla_{x^l_j} logp(x^l_j)+ \n abla_{x^l_j} k(x^l_j,x)]
501
507
502
508
Parameters
503
509
----------
504
- n_particles : int
510
+ n_particles : ` int`
505
511
number of particles to use for approximation
506
- jitter :
512
+ jitter : `float`
507
513
noise sd for initial point
508
- model : pm.Model
509
- kernel : callable
514
+ model : :class:`Model`
515
+ PyMC3 model for inference
516
+ kernel : `callable`
510
517
kernel function for KSD f(histogram) -> (k(x,.), \n abla_x k(x,.))
511
518
scale_cost_to_minibatch : bool, default False
512
519
Scale cost to minibatch instead of full dataset
513
- start : dict
520
+ start : ` dict`
514
521
initial point for inference
515
- histogram : Empirical
522
+ histogram : :class:` Empirical`
516
523
initialize SVGD with given Empirical approximation instead of default initial particles
517
524
seed : None or int
518
525
leave None to use package global RandomStream or other
519
526
valid value to create instance specific one
520
- start : Point
527
+ start : ` Point`
521
528
starting point for inference
522
529
523
530
References
524
531
----------
525
- - Qiang Liu, Dilin Wang (2016)
532
+ - Qiang Liu, Dilin Wang (2016)
526
533
Stein Variational Gradient Descent: A General Purpose Bayesian Inference Algorithm
527
534
arXiv:1608.04471
528
535
"""
@@ -546,26 +553,31 @@ def fit(n=10000, local_rv=None, method='advi', model=None, seed=None, start=None
546
553
547
554
Parameters
548
555
----------
549
- n : int
556
+ n : ` int`
550
557
number of iterations
551
558
local_rv : dict[var->tuple]
552
559
mapping {model_variable -> local_variable (:math:`\\ mu`, :math:`\\ rho`)}
553
560
Local Vars are used for Autoencoding Variational Bayes
554
561
See (AEVB; Kingma and Welling, 2014) for details
555
- method : str or Inference
562
+ method : str or :class:` Inference`
556
563
string name is case insensitive in {'advi', 'fullrank_advi', 'advi->fullrank_advi'}
557
- model : Model
558
- kwargs : kwargs for Inference.fit
559
- frac : float
564
+ model : :class:`Model`
565
+ PyMC3 model for inference
566
+
567
+ Other Parameters
568
+ ----------------
569
+ frac : `float`
560
570
if method is 'advi->fullrank_advi' represents advi fraction when training
561
571
seed : None or int
562
572
leave None to use package global RandomStream or other
563
573
valid value to create instance specific one
564
- start : Point
574
+ start : ` Point`
565
575
starting point for inference
576
+ kwargs : kwargs for :method:`Inference.fit`
577
+
566
578
Returns
567
579
-------
568
- Approximation
580
+ :class:` Approximation`
569
581
"""
570
582
if model is None :
571
583
model = pm .modelcontext (model )
0 commit comments