diff --git a/docs/source/api/smc.rst b/docs/source/api/smc.rst
index 55f228da23..0627d69c8e 100644
--- a/docs/source/api/smc.rst
+++ b/docs/source/api/smc.rst
@@ -8,7 +8,8 @@ Sequential Monte Carlo
 
     sample_smc
 
-(smc_kernels)=
+.. _smc_kernels:
+
 SMC kernels
 -----------
 
diff --git a/pymc/smc/kernels.py b/pymc/smc/kernels.py
index 04131a0ddc..a19507cacd 100644
--- a/pymc/smc/kernels.py
+++ b/pymc/smc/kernels.py
@@ -41,7 +41,7 @@
 
 
 class SMC_KERNEL(ABC):
-    """Base class for the Sequential Monte Carlo kernels
+    """Base class for the Sequential Monte Carlo kernels.
 
     To create a new SMC kernel you should subclass from this.
 
@@ -53,73 +53,73 @@ class SMC_KERNEL(ABC):
             to sampling from the prior distribution. This method is only called
             if `start` is not specified.
 
-        _initialize_kernel: default
+        _initialize_kernel : default
             Creates initial population of particles in the variable
             `self.tempered_posterior` and populates the `self.var_info` dictionary
             with information about model variables shape and size as
-            {var.name : (var.shape, var.size)
+            {var.name : (var.shape, var.size)}.
 
-            The functions self.prior_logp_func and self.likelihood_logp_func are
+            The functions `self.prior_logp_func` and `self.likelihood_logp_func` are
             created in this step. These expect a 1D numpy array with the summed
             sizes of each raveled model variable (in the order specified in
-            model.inial_point).
+            :meth:`pymc.Model.initial_point`).
 
             Finally, this method computes the log prior and log likelihood for
-            the initial particles, and saves them in self.prior_logp and
-            self.likelihood_logp.
+            the initial particles, and saves them in `self.prior_logp` and
+            `self.likelihood_logp`.
 
             This method should not be modified.
 
-        setup_kernel: optional
+        setup_kernel : optional
             May include any logic that should be performed before sampling
             starts.
 
     During each sampling stage the following methods are called in order:
 
-        update_beta_and_weights: default
-            The inverse temperature self.beta is updated based on the self.likelihood_logp
-            and `threshold` parameter
+        update_beta_and_weights : default
+            The inverse temperature self.beta is updated based on the `self.likelihood_logp`
+            and `threshold` parameter.
 
-            The importance self.weights of each particle are computed from the old and newly
-            selected inverse temperature
+            The importance `self.weights` of each particle are computed from the old and newly
+            selected inverse temperature.
 
             The iteration number stored in `self.iteration` is updated by this method.
 
-            Finally the model log_marginal_likelihood of the tempered posterior
-            is updated from these weights
+            Finally the model `log_marginal_likelihood` of the tempered posterior
+            is updated from these weights.
 
-        resample: default
-            The particles in self.posterior are sampled with replacement based
-            on self.weights, and the used resampling indexes are saved in
+        resample : default
+            The particles in `self.posterior` are sampled with replacement based
+            on `self.weights`, and the used resampling indexes are saved in
             `self.resampling_indexes`.
 
-            The arrays self.prior_logp, self.likelihood_logp are rearranged according
-            to the order of the resampled particles. self.tempered_posterior_logp
-            is computed from these and the current self.beta
+            The arrays `self.prior_logp` and `self.likelihood_logp` are rearranged according
+            to the order of the resampled particles. `self.tempered_posterior_logp`
+            is computed from these and the current `self.beta`.
 
-        tune: optional
-            May include logic that should be performed before every mutation step
+        tune : optional
+            May include logic that should be performed before every mutation step.
 
-        mutate: REQUIRED
-            Mutate particles in self.tempered_posterior
+        mutate : REQUIRED
+            Mutate particles in `self.tempered_posterior`.
 
-            This method is further responsible to update the self.prior_logp,
-            self.likelihod_logp and self.tempered_posterior_logp, corresponding
-            to each mutated particle
+            This method is further responsible to update the `self.prior_logp`,
+            `self.likelihod_logp` and `self.tempered_posterior_logp`, corresponding
+            to each mutated particle.
 
-        sample_stats: default
+        sample_stats : default
             Returns important sampling_stats at the end of each stage in a dictionary
-            format. This will be saved in the final InferenceData objcet under `sample_stats`.
+            format. This will be saved in the final InferenceData object under `sample_stats`.
 
     Finally, at the end of sampling the following methods are called:
 
-        _posterior_to_trace: default
+        _posterior_to_trace : default
             Convert final population of particles to a posterior trace object.
             This method should not be modified.
 
-        sample_settings: default:
+        sample_settings : default
             Returns important sample_settings at the end of sampling in a dictionary
-            format. This will be saved in the final InferenceData objcet under `sample_stats`.
+            format. This will be saved in the final InferenceData object under `sample_stats`.
 
     """
 
@@ -132,23 +132,29 @@ def __init__(
         threshold=0.5,
     ):
         """
+        Initialize the SMC_kernel class.
 
         Parameters
         ----------
-        draws: int
-            The number of samples to draw from the posterior (i.e. last stage). And also the number of
+        draws : int, default 2000
+            The number of samples to draw from the posterior (i.e. last stage). Also the number of
             independent chains. Defaults to 2000.
-        start: dict, or array of dict
+        start : dict, or array of dict, default None
             Starting point in parameter space. It should be a list of dict with length `chains`.
             When None (default) the starting point is sampled from the prior distribution.
-        model: Model (optional if in ``with`` context)).
-        random_seed: int
+        model : Model (optional if in ``with`` context).
+        random_seed : int, array_like of int, RandomState or Generator, optional
             Value used to initialize the random number generator.
-        threshold: float
+        threshold : float, default 0.5
             Determines the change of beta from stage to stage, i.e.indirectly the number of stages,
             the higher the value of `threshold` the higher the number of stages. Defaults to 0.5.
             It should be between 0 and 1.
 
+        Attributes
+        ----------
+        self.var_info : dict
+            Dictionary that contains information about model variables shape and size.
+
         """
 
         self.draws = draws
@@ -199,7 +205,7 @@ def initialize_population(self) -> Dict[str, np.ndarray]:
         return cast(Dict[str, np.ndarray], dict_prior)
 
     def _initialize_kernel(self):
-        """Create variables and logp function necessary to run kernel
+        """Create variables and logp function necessary to run SMC kernel
 
         This method should not be overwritten. If needed, use `setup_kernel`
         instead.
@@ -301,7 +307,7 @@ def mutate(self):
     def sample_stats(self) -> Dict:
         """Stats to be saved at the end of each stage
 
-        These stats will be saved under `sample_stats` in the final InferenceData.
+        These stats will be saved under `sample_stats` in the final InferenceData object.
         """
         return {
             "log_marginal_likelihood": self.log_marginal_likelihood if self.beta == 1 else np.nan,
@@ -309,9 +315,9 @@ def sample_stats(self) -> Dict:
         }
 
     def sample_settings(self) -> Dict:
-        """Kernel settings to be saved once at the end of sampling
+        """SMC_kernel settings to be saved once at the end of sampling.
 
-        These stats will be saved under `sample_stats` in the final InferenceData.
+        These stats will be saved under `sample_stats` in the final InferenceData object.
 
         """
         return {
@@ -347,15 +353,19 @@ def _posterior_to_trace(self, chain=0) -> NDArray:
 
 
 class IMH(SMC_KERNEL):
-    """Independent Metropolis-Hastings SMC kernel"""
+    """Independent Metropolis-Hastings SMC_kernel"""
 
     def __init__(self, *args, correlation_threshold=0.01, **kwargs):
         """
         Parameters
         ----------
-        correlation_threshold: float
-            The lower the value the higher the number of IMH steps computed automatically.
+        correlation_threshold : float, default 0.01
+            The lower the value, the higher the number of IMH steps computed automatically.
             Defaults to 0.01. It should be between 0 and 1.
+        **kwargs : dict, optional
+            Keyword arguments passed to the SMC_kernel.  Refer to SMC_kernel documentation for a
+        list of all possible arguments.
+
         """
         super().__init__(*args, **kwargs)
         self.correlation_threshold = correlation_threshold
@@ -449,15 +459,19 @@ def get(self, b):
 
 
 class MH(SMC_KERNEL):
-    """Metropolis-Hastings SMC kernel"""
+    """Metropolis-Hastings SMC_kernel"""
 
     def __init__(self, *args, correlation_threshold=0.01, **kwargs):
         """
         Parameters
         ----------
-        correlation_threshold: float
-            The lower the value the higher the number of MH steps computed automatically.
+        correlation_threshold : float, default 0.01
+            The lower the value, the higher the number of MH steps computed automatically.
             Defaults to 0.01. It should be between 0 and 1.
+        **kwargs : dict, optional
+            Keyword arguments passed to the SMC_kernel.  Refer to SMC_kernel documentation for a
+            list of all possible arguments.
+
         """
         super().__init__(*args, **kwargs)
         self.correlation_threshold = correlation_threshold
@@ -468,7 +482,7 @@ def __init__(self, *args, correlation_threshold=0.01, **kwargs):
 
     def setup_kernel(self):
         """Proposal dist is just a Multivariate Normal with unit identity covariance.
-        Dimension specific scaling is provided by self.proposal_scales and set in self.tune()
+        Dimension specific scaling is provided by `self.proposal_scales` and set in `self.tune()`
         """
         ndim = self.tempered_posterior.shape[1]
         self.proposal_scales = np.full(self.draws, min(1, 2.38**2 / ndim))
@@ -586,11 +600,11 @@ def _logp_forw(point, out_vars, in_vars, shared):
     Parameters
     ----------
     out_vars : list
-        containing :class:`pymc.Distribution` for the output variables
+        Containing Distribution for the output variables
     in_vars : list
-        containing :class:`pymc.Distribution` for the input variables
+        Containing Distribution for the input variables
     shared : list
-        containing :class:`aesara.tensor.Tensor` for depended shared data
+        Containing TensorVariable for depended shared data
     """
 
     # Replace integer inputs with rounded float inputs
diff --git a/pymc/smc/sampling.py b/pymc/smc/sampling.py
index 19f783610d..0cd5c39cc0 100644
--- a/pymc/smc/sampling.py
+++ b/pymc/smc/sampling.py
@@ -56,52 +56,54 @@ def sample_smc(
 
     Parameters
     ----------
-    draws: int
+    draws : int, default 2000
         The number of samples to draw from the posterior (i.e. last stage). And also the number of
         independent chains. Defaults to 2000.
-    kernel: SMC Kernel used. Defaults to pm.smc.IMH (Independent Metropolis Hastings)
-    start: dict, or array of dict
+    kernel : SMC_kernel, optional
+        SMC kernel used. Defaults to :class:`pymc.smc.smc.IMH` (Independent Metropolis Hastings)
+    start : dict or array of dict, optional
         Starting point in parameter space. It should be a list of dict with length `chains`.
         When None (default) the starting point is sampled from the prior distribution.
-    model: Model (optional if in ``with`` context)).
-    random_seed : int, array-like of int, RandomState or Generator, optional
+    model : Model (optional if in ``with`` context).
+    random_seed :  int, array_like of int, RandomState or numpy_Generator, optional
         Random seed(s) used by the sampling steps. If a list, tuple or array of ints
         is passed, each entry will be used to seed each chain. A ValueError will be
         raised if the length does not match the number of chains.
-    chains : int
+    chains : int, optional
         The number of chains to sample. Running independent chains is important for some
         convergence statistics. If ``None`` (default), then set to either ``cores`` or 2, whichever
         is larger.
-    cores : int
+    cores : int, default None
         The number of chains to run in parallel. If ``None``, set to the number of CPUs in the
         system.
-    compute_convergence_checks : bool
+    compute_convergence_checks : bool, default True
         Whether to compute sampler statistics like ``R hat`` and ``effective_n``.
         Defaults to ``True``.
-    return_inferencedata : bool, default=True
-        Whether to return the trace as an :class:`arviz:arviz.InferenceData` (True) object or a `MultiTrace` (False)
+    return_inferencedata : bool, default True
+        Whether to return the trace as an InferenceData (True) object or a MultiTrace (False).
         Defaults to ``True``.
     idata_kwargs : dict, optional
-        Keyword arguments for :func:`pymc.to_inference_data`
-    progressbar : bool, optional default=True
+        Keyword arguments for :func:`pymc.to_inference_data`.
+    progressbar : bool, optional, default True
         Whether or not to display a progress bar in the command line.
-    **kernel_kwargs: keyword arguments passed to the SMC kernel.
-        The default IMH kernel takes the following keywords:
-            threshold: float
-                Determines the change of beta from stage to stage, i.e. indirectly the number of stages,
-                the higher the value of `threshold` the higher the number of stages. Defaults to 0.5.
-                It should be between 0 and 1.
-            correlation_threshold: float
+    **kernel_kwargs : dict, optional
+        Keyword arguments passed to the SMC_kernel. The default IMH kernel takes the following keywords:
+
+        threshold : float, default 0.5
+          Determines the change of beta from stage to stage, i.e. indirectly the number of stages,
+          the higher the value of `threshold` the higher the number of stages. Defaults to 0.5.
+          It should be between 0 and 1.
+            correlation_threshold : float, default 0.01
                 The lower the value the higher the number of MCMC steps computed automatically.
                 Defaults to 0.01. It should be between 0 and 1.
-        Keyword arguments for other kernels should be checked in the respective docstrings
+        Keyword arguments for other kernels should be checked in the respective docstrings.
 
     Notes
     -----
     SMC works by moving through successive stages. At each stage the inverse temperature
     :math:`\beta` is increased a little bit (starting from 0 up to 1). When :math:`\beta` = 0
-    we have the prior distribution and when :math:`\beta` =1 we have the posterior distribution.
-    So in more general terms we are always computing samples from a tempered posterior that we can
+    we have the prior distribution and when :math:`\beta = 1` we have the posterior distribution.
+    So in more general terms, we are always computing samples from a tempered posterior that we can
     write as:
 
     .. math::
@@ -113,12 +115,12 @@ def sample_smc(
      1. Initialize :math:`\beta` at zero and stage at zero.
      2. Generate N samples :math:`S_{\beta}` from the prior (because when :math `\beta = 0` the
         tempered posterior is the prior).
-     3. Increase :math:`\beta` in order to make the effective sample size equals some predefined
+     3. Increase :math:`\beta` in order to make the effective sample size equal some predefined
         value (we use :math:`Nt`, where :math:`t` is 0.5 by default).
      4. Compute a set of N importance weights W. The weights are computed as the ratio of the
         likelihoods of a sample at stage i+1 and stage i.
      5. Obtain :math:`S_{w}` by re-sampling according to W.
-     6. Use W to compute the mean and covariance for the proposal distribution, a MVNormal.
+     6. Use W to compute the mean and covariance for the proposal distribution, a MvNormal.
      7. Run N independent MCMC chains, starting each one from a different sample
         in :math:`S_{w}`. For the IMH kernel, the mean of the proposal distribution is the
         mean of the previous posterior stage and not the current point in parameter space.
@@ -130,15 +132,15 @@ def sample_smc(
 
     References
     ----------
-    .. [Minson2013] Minson, S. E. and Simons, M. and Beck, J. L., (2013),
-        Bayesian inversion for finite fault earthquake source models I- Theory and algorithm.
-        Geophysical Journal International, 2013, 194(3), pp.1701-1726,
+    .. [Minson2013] Minson, S. E., Simons, M., and Beck, J. L. (2013).
+        "Bayesian inversion for finite fault earthquake source models I- Theory and algorithm."
+        Geophysical Journal International, 2013, 194(3), pp.1701-1726.
         `link <https://gji.oxfordjournals.org/content/194/3/1701.full>`__
 
-    .. [Ching2007] Ching, J. and Chen, Y. (2007).
-        Transitional Markov Chain Monte Carlo Method for Bayesian Model Updating, Model Class
-        Selection, and Model Averaging. J. Eng. Mech., 10.1061/(ASCE)0733-9399(2007)133:7(816),
-        816-832. `link <http://ascelibrary.org/doi/abs/10.1061/%28ASCE%290733-9399
+    .. [Ching2007] Ching, J., and Chen, Y. (2007).
+        "Transitional Markov Chain Monte Carlo Method for Bayesian Model Updating, Model Class
+        Selection, and Model Averaging." J. Eng. Mech., 2007, 133(7), pp. 816-832. doi:10.1061/(ASCE)0733-9399(2007)133:7(816).
+        `link <http://ascelibrary.org/doi/abs/10.1061/%28ASCE%290733-9399
         %282007%29133:7%28816%29>`__
     """
 
diff --git a/pymc/variational/inference.py b/pymc/variational/inference.py
index b2f8a4a0ba..94bc223b34 100644
--- a/pymc/variational/inference.py
+++ b/pymc/variational/inference.py
@@ -49,12 +49,12 @@ class Inference:
 
     Parameters
     ----------
-    op: Operator class
-    approx: Approximation class or instance
-    tf: TestFunction instance
-    model: Model
+    op : Operator class    #:class:`~pymc.variational.operators`
+    approx : Approximation class or instance    #:class:`~pymc.variational.approximations`
+    tf : TestFunction instance  #?
+    model : Model
         PyMC Model
-    kwargs: kwargs passed to :class:`Operator`
+    kwargs : kwargs passed to :class:`Operator` #:class:`~pymc.variational.operators`, optional
     """
 
     def __init__(self, op, approx, tf, **kwargs):
@@ -96,18 +96,18 @@ def fit(self, n=10000, score=None, callbacks=None, progressbar=True, **kwargs):
 
         Parameters
         ----------
-        n: int
+        n : int
             number of iterations
-        score: bool
+        score : bool
             evaluate loss on each iteration or not
-        callbacks: list[function: (Approximation, losses, i) -> None]
+        callbacks : list[function: (Approximation, losses, i) -> None]
             calls provided functions after each iteration step
-        progressbar: bool
+        progressbar : bool
             whether to show progressbar or not
 
         Other Parameters
         ----------------
-        obj_n_mc: `int`
+        obj_n_mc: int
             Number of monte carlo samples used for approximation of objective gradients
         tf_n_mc: `int`
             Number of monte carlo samples used for approximation of test function gradients