refine docs, add dev-docs

ferrine · ferrine · commit 2dfd78c7b1d9 · 2017-09-02T10:43:22.000+03:00
diff --git a/pymc3/variational/approximations.py b/pymc3/variational/approximations.py
@@ -414,7 +414,7 @@ def __init_group__(self, group):
             )
         if not isinstance(formula, flows.Formula):
             formula = flows.Formula(formula)
-        if self.islocal:
+        if self.local:
             bs = -1
         elif self.batched:
             bs = self.bdim
@@ -500,7 +500,7 @@ def symbolic_random(self):
 
     @node_property
     def bdim(self):
-        if not self.islocal:
+        if not self.local:
             return super(NormalizingFlowGroup, self).bdim
         else:
             return next(iter(self.user_params[0].values())).shape[0]
@@ -533,11 +533,11 @@ def sample_approx(approx, draws=100, include_transformed=True):
 # single group shortcuts exported to user
 class SingleGroupApproximation(Approximation):
     """Base class for Single Group Approximation"""
-    group_class = None
+    _group_class = None
 
     def __init__(self, *args, **kwargs):
         local_rv = kwargs.get('local_rv')
-        groups = [self.group_class(None, *args, **kwargs)]
+        groups = [self._group_class(None, *args, **kwargs)]
         if local_rv is not None:
             groups.extend([Group([v], params=p, local=True, model=kwargs.get('model'))
                            for v, p in local_rv.items()])
@@ -549,17 +549,17 @@ def __getattr__(self, item):
 
 class MeanField(SingleGroupApproximation):
     """Single Group Mean Field Approximation"""
-    group_class = MeanFieldGroup
+    _group_class = MeanFieldGroup
 
 
 class FullRank(SingleGroupApproximation):
     """Single Group Full Rank Approximation"""
-    group_class = FullRankGroup
+    _group_class = FullRankGroup
 
 
 class Empirical(SingleGroupApproximation):
     """Single Group Full Rank Approximation"""
-    group_class = EmpiricalGroup
+    _group_class = EmpiricalGroup
 
     def __init__(self, trace=None, size=None, **kwargs):
         if kwargs.get('local_rv', None) is not None:
@@ -569,7 +569,7 @@ def __init__(self, trace=None, size=None, **kwargs):
 
 class NormalizingFlow(SingleGroupApproximation):
     """Single Group Normalizing Flow Approximation"""
-    group_class = NormalizingFlowGroup
+    _group_class = NormalizingFlowGroup
 
     def __init__(self, flow=NormalizingFlowGroup.default_flow, *args, **kwargs):
         kwargs['flow'] = flow
diff --git a/pymc3/variational/flows.py b/pymc3/variational/flows.py
@@ -129,7 +129,7 @@ def flow_for_short_name(cls, name):
         return cls.__name_registry[name.lower()]
 
     def __init__(self, z0=None, dim=None, jitter=.001, batch_size=None, local=False):
-        self.islocal = local
+        self.local = local
         self.batch_size = batch_size
         self.__jitter = jitter
         if isinstance(z0, AbstractFlow):
@@ -155,7 +155,7 @@ def add_param(self, user=None, name=None, ref=0., dtype='floatX'):
         spec = self.__param_spec__[name]
         shape = tuple(eval(s, {'d': self.dim}) for s in spec)
         if user is None:
-            if self.islocal:
+            if self.local:
                 raise opvi.LocalGroupError('Need parameters for local group flow')
             if self.batched:
                 if self.batch_size is None:
@@ -168,7 +168,7 @@ def add_param(self, user=None, name=None, ref=0., dtype='floatX'):
 
         else:
             if self.batched:
-                if self.islocal or self.batch_size is None:
+                if self.local or self.batch_size is None:
                     shape = (-1,) + shape
                 else:
                     shape = (self.batch_size,) + shape
diff --git a/pymc3/variational/inference.py b/pymc3/variational/inference.py
@@ -32,8 +32,7 @@
 
 
 class Inference(object):
-    R"""
-    Base class for Variational Inference
+    R"""**Base class for Variational Inference**
 
     Communicates Operator, Approximation and Test Function to build Objective Function
 
@@ -87,8 +86,7 @@ def run_profiling(self, n=1000, score=None, **kwargs):
 
     def fit(self, n=10000, score=None, callbacks=None, progressbar=True,
             **kwargs):
-        """
-        Performs Operator Variational Inference
+        """Perform Operator Variational Inference
 
         Parameters
         ----------
@@ -100,8 +98,29 @@ def fit(self, n=10000, score=None, callbacks=None, progressbar=True,
             calls provided functions after each iteration step
         progressbar : bool
             whether to show progressbar or not
-        kwargs : kwargs
-            additional kwargs for :func:`ObjectiveFunction.step_function`
+
+        Other Parameters
+        ----------------
+        obj_n_mc : `int`
+            Number of monte carlo samples used for approximation of objective gradients
+        tf_n_mc : `int`
+            Number of monte carlo samples used for approximation of test function gradients
+        obj_optimizer : function (grads, params) -> updates
+            Optimizer that is used for objective params
+        test_optimizer : function (grads, params) -> updates
+            Optimizer that is used for test function params
+        more_obj_params : `list`
+            Add custom params for objective optimizer
+        more_tf_params : `list`
+            Add custom params for test function optimizer
+        more_updates : `dict`
+            Add custom updates to resulting updates
+        total_grad_norm_constraint : `float`
+            Bounds gradient norm, prevents exploding gradient problem
+        fn_kwargs : `dict`
+            Add kwargs to theano.function (e.g. `{'profile': True}`)
+        more_replacements : `dict`
+            Apply custom replacements before calculating gradients
 
         Returns
         -------
@@ -198,7 +217,8 @@ def _infmean(input_array):
                      score=True)
 
     def refine(self, n, progressbar=True):
-        """Refine the solution using the last compiled step function"""
+        """Refine the solution using the last compiled step function
+        """
         if self.state is None:
             raise TypeError('Need to call `.fit` first')
         i, step, callbacks, score = self.state
@@ -211,19 +231,22 @@ def refine(self, n, progressbar=True):
 
 
 class KLqp(Inference):
-    """General approach to fit Approximations that define :math:`logq`
-    by maximizing ELBO (Evidence Lower BOund).
+    """**Kullback Leibler Divergence Inference**
+
+    General approach to fit Approximations that define :math:`logq`
+    by maximizing ELBO (Evidence Lower Bound).
 
     Parameters
     ----------
     approx : :class:`Approximation`
+        Approximation to fit, it is required to have `logQ`
     """
     def __init__(self, approx):
         super(KLqp, self).__init__(KL, approx, None)
 
 
 class ADVI(KLqp):
-    R"""Automatic Differentiation Variational Inference (ADVI)
+    R"""**Automatic Differentiation Variational Inference (ADVI)**
 
     This class implements the meanfield ADVI, where the variational
     posterior distribution is assumed to be spherical Gaussian without
@@ -341,7 +364,7 @@ class ADVI(KLqp):
     Parameters
     ----------
     local_rv : dict[var->tuple]
-        mapping {model_variable -> local_variable (:math:`\mu`, :math:`\rho`)}
+        mapping {model_variable -> approx params}
         Local Vars are used for Autoencoding Variational Bayes
         See (AEVB; Kingma and Welling, 2014) for details
     model : :class:`pymc3.Model`
@@ -371,12 +394,12 @@ def __init__(self, *args, **kwargs):
 
 
 class FullRankADVI(KLqp):
-    R"""Full Rank Automatic Differentiation Variational Inference (ADVI)
+    R"""**Full Rank Automatic Differentiation Variational Inference (ADVI)**
 
     Parameters
     ----------
     local_rv : dict[var->tuple]
-        mapping {model_variable -> local_variable (:math:`\mu`, :math:`\rho`)}
+        mapping {model_variable -> approx params}
         Local Vars are used for Autoencoding Variational Bayes
         See (AEVB; Kingma and Welling, 2014) for details
     model : :class:`pymc3.Model`
@@ -406,7 +429,9 @@ def __init__(self, *args, **kwargs):
 
 
 class ImplicitGradient(Inference):
-    """Implicit Gradient for Variational Inference
+    """**Implicit Gradient for Variational Inference**
+
+    **not suggested to use**
 
     An approach to fit arbitrary approximation by computing kernel based gradient
     By default RBF kernel is used for gradient estimation. Default estimator is
@@ -424,7 +449,7 @@ def __init__(self, approx, estimator=KSD, kernel=test_functions.rbf, **kwargs):
 
 
 class SVGD(ImplicitGradient):
-    R"""Stein Variational Gradient Descent
+    R"""**Stein Variational Gradient Descent**
 
     This inference is based on Kernelized Stein Discrepancy
     it's main idea is to move initial noisy particles so that
@@ -433,9 +458,9 @@ class SVGD(ImplicitGradient):
     Algorithm is outlined below
 
     *Input:* A target distribution with density function :math:`p(x)`
-            and a set of initial particles :math:`{x^0_i}^n_{i=1}`
+            and a set of initial particles :math:`\{x^0_i\}^n_{i=1}`
 
-    *Output:* A set of particles :math:`{x_i}^n_{i=1}` that approximates the target distribution.
+    *Output:* A set of particles :math:`\{x^{*}_i\}^n_{i=1}` that approximates the target distribution.
 
     .. math::
 
@@ -490,7 +515,9 @@ def __init__(self, n_particles=100, jitter=1, model=None, start=None,
 
 
 class ASVGD(ImplicitGradient):
-    R"""Amortized Stein Variational Gradient Descent
+    R"""**Amortized Stein Variational Gradient Descent**
+
+    **not suggested to use**
 
     This inference is based on Kernelized Stein Discrepancy
     it's main idea is to move initial noisy particles so that
@@ -585,7 +612,7 @@ def run_profiling(self, n=1000, score=None, obj_n_mc=500, **kwargs):
 
 
 class NFVI(KLqp):
-    R"""Normalizing Flow based :class:`KLqp` inference
+    R"""**Normalizing Flow based :class:`KLqp` inference**
 
     Normalizing flow is a series of invertible transformations on initial distribution.
 
@@ -648,15 +675,14 @@ def fit(n=10000, local_rv=None, method='advi', model=None,
     n : `int`
         number of iterations
     local_rv : dict[var->tuple]
-        mapping {model_variable -> local_variable (:math:`\mu`, :math:`\rho`)}
+        mapping {model_variable -> approx params}
         Local Vars are used for Autoencoding Variational Bayes
         See (AEVB; Kingma and Welling, 2014) for details
     method : str or :class:`Inference`
         string name is case insensitive in:
 
         -   'advi'  for ADVI
         -   'fullrank_advi'  for FullRankADVI
-        -   'advi->fullrank_advi'  for fitting ADVI first and then FullRankADVI
         -   'svgd'  for Stein Variational Gradient Descent
         -   'asvgd'  for Amortized Stein Variational Gradient Descent
         -   'nfvi'  for Normalizing Flow with default `scale-loc` flow
@@ -671,8 +697,35 @@ def fit(n=10000, local_rv=None, method='advi', model=None,
         additional kwargs passed to :class:`Inference`
     start : `Point`
         starting point for inference
-    kwargs : kwargs
-        additional kwargs for :func:`Inference.fit`
+
+    Other Parameters
+    ----------------
+    score : bool
+            evaluate loss on each iteration or not
+    callbacks : list[function : (Approximation, losses, i) -> None]
+        calls provided functions after each iteration step
+    progressbar : bool
+        whether to show progressbar or not
+    obj_n_mc : `int`
+        Number of monte carlo samples used for approximation of objective gradients
+    tf_n_mc : `int`
+        Number of monte carlo samples used for approximation of test function gradients
+    obj_optimizer : function (grads, params) -> updates
+        Optimizer that is used for objective params
+    test_optimizer : function (grads, params) -> updates
+        Optimizer that is used for test function params
+    more_obj_params : `list`
+        Add custom params for objective optimizer
+    more_tf_params : `list`
+        Add custom params for test function optimizer
+    more_updates : `dict`
+        Add custom updates to resulting updates
+    total_grad_norm_constraint : `float`
+        Bounds gradient norm, prevents exploding gradient problem
+    fn_kwargs : `dict`
+        Add kwargs to theano.function (e.g. `{'profile': True}`)
+    more_replacements : `dict`
+        Apply custom replacements before calculating gradients
 
     Returns
     -------
diff --git a/pymc3/variational/operators.py b/pymc3/variational/operators.py
@@ -12,21 +12,21 @@
 
 
 class KL(Operator):
-    """
-    Operator based on Kullback Leibler Divergence
+    R"""**Operator based on Kullback Leibler Divergence**
 
     .. math::
 
-        KL[q(v)||p(v)] = \int q(v)\log\\frac{q(v)}{p(v)}dv
+        KL[q(v)||p(v)] = \int q(v)\log\frac{q(v)}{p(v)}dv
     """
+
     def apply(self, f):
         return self.logq_norm - self.logp_norm
 
 # SVGD Implementation
 
 
 class KSDObjective(ObjectiveFunction):
-    """Helper class for construction loss and updates for variational inference
+    R"""Helper class for construction loss and updates for variational inference
 
     Parameters
     ----------
@@ -59,8 +59,7 @@ def __call__(self, nmc, **kwargs):
 
 
 class KSD(Operator):
-    R"""
-    Operator based on Kernelized Stein Discrepancy
+    R"""**Operator based on Kernelized Stein Discrepancy**
 
     Input: A target distribution with density function :math:`p(x)`
         and a set of initial particles :math:`\{x^0_i\}^n_{i=1}`
@@ -74,8 +73,8 @@ class KSD(Operator):
 
     Parameters
     ----------
-    approx : :class:`Empirical`
-        Empirical Approximation used for inference
+    approx : :class:`Approximation`
+        Approximation used for inference
 
     References
     ----------
diff --git a/pymc3/variational/opvi.py b/pymc3/variational/opvi.py