diff --git a/docs/source/api/samplers.rst b/docs/source/api/samplers.rst index 1cd43b1da3..0acbc9df28 100644 --- a/docs/source/api/samplers.rst +++ b/docs/source/api/samplers.rst @@ -52,17 +52,6 @@ Metropolis family PoissonProposal UniformProposal -MLDA family ------------ - -.. autosummary:: - :toctree: generated/ - - MLDA - DEMetropolisZMLDA - MetropolisMLDA - RecursiveDAProposal - Other step methods ------------------ diff --git a/pymc/step_methods/__init__.py b/pymc/step_methods/__init__.py index 8eb79a44b5..d7beea1116 100644 --- a/pymc/step_methods/__init__.py +++ b/pymc/step_methods/__init__.py @@ -28,12 +28,6 @@ PoissonProposal, UniformProposal, ) -from pymc.step_methods.mlda import ( - MLDA, - DEMetropolisZMLDA, - MetropolisMLDA, - RecursiveDAProposal, -) from pymc.step_methods.slicer import Slice STEP_METHODS = ( diff --git a/pymc/step_methods/mlda.py b/pymc/step_methods/mlda.py deleted file mode 100644 index 5b6f350730..0000000000 --- a/pymc/step_methods/mlda.py +++ /dev/null @@ -1,1099 +0,0 @@ -# Copyright 2020 The PyMC Developers -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import logging -import warnings - -from typing import Any, Dict, List, Optional, Tuple, Type, Union - -import arviz as az -import numpy as np - -from aesara.tensor.sharedvar import TensorSharedVariable - -import pymc as pm - -from pymc.blocking import DictToArrayBijection, RaveledVars -from pymc.model import Model, Point -from pymc.step_methods.arraystep import ArrayStepShared, Competence, metrop_select -from pymc.step_methods.compound import CompoundStep -from pymc.step_methods.metropolis import DEMetropolisZ, Metropolis, Proposal, delta_logp - -__all__ = [ - "MetropolisMLDA", - "DEMetropolisZMLDA", - "RecursiveDAProposal", - "MLDA", - "extract_Q_estimate", -] - - -class MetropolisMLDA(Metropolis): - """ - Metropolis-Hastings sampling step tailored for use as base sampler in MLDA. - """ - - name = "metropolis_mlda" - - def __init__(self, *args, **kwargs): - """ - Initialise MetropolisMLDA. This is a mix of the parent's class' initialisation - and some extra code specific for MLDA. - """ - model = pm.modelcontext(kwargs.get("model", None)) - initial_values = model.initial_point() - - # flag to that variance reduction is activated - forces MetropolisMLDA - # to store quantities of interest in a register if True - self.mlda_variance_reduction = kwargs.pop("mlda_variance_reduction", False) - if self.mlda_variance_reduction: - # Subsampling rate of MLDA sampler one level up - self.mlda_subsampling_rate_above = kwargs.pop("mlda_subsampling_rate_above") - self.sub_counter = 0 - self.Q_last = np.nan - self.Q_reg = [np.nan] * self.mlda_subsampling_rate_above - - # call parent class __init__ - super().__init__(*args, **kwargs) - - # modify the delta function and point to model if VR is used - if self.mlda_variance_reduction: - self.model = model - self.delta_logp_factory = self.delta_logp - self.delta_logp = lambda q, q0: -self.delta_logp_factory(q0, q) - - def reset_tuning(self): - """ - Does not reset sampler parameters. Allows continuation with - the same settings when MetropolisMLDA steps are done in chunks - under MLDA. - """ - return - - def astep(self, q0): - - q_new, stats = super().astep(q0) - - # Add variance reduction functionality. - if self.mlda_variance_reduction: - if stats[0]["accepted"]: - self.Q_last = self.model.Q.get_value() - if self.sub_counter == self.mlda_subsampling_rate_above: - self.sub_counter = 0 - self.Q_reg[self.sub_counter] = self.Q_last - self.sub_counter += 1 - - return q_new, stats - - -class DEMetropolisZMLDA(DEMetropolisZ): - """ - DEMetropolisZ sampling step tailored for use as base sampler in MLDA - """ - - name = "DEMetropolisZ_mlda" - - def __init__(self, *args, **kwargs): - """ - Initialise DEMetropolisZMLDA, uses parent class __init__ - and extra code specific for use within MLDA. - """ - - # flag used for signaling the end of tuning - self.tuning_end_trigger = False - - model = pm.modelcontext(kwargs.get("model", None)) - initial_values = model.initial_point() - - # flag to that variance reduction is activated - forces DEMetropolisZMLDA - # to store quantities of interest in a register if True - self.mlda_variance_reduction = kwargs.pop("mlda_variance_reduction", False) - if self.mlda_variance_reduction: - # Subsampling rate of MLDA sampler one level up - self.mlda_subsampling_rate_above = kwargs.pop("mlda_subsampling_rate_above") - self.sub_counter = 0 - self.Q_last = np.nan - self.Q_reg = [np.nan] * self.mlda_subsampling_rate_above - - # call parent class __init__ - super().__init__(*args, **kwargs) - - # modify the delta function and point to model if VR is used - if self.mlda_variance_reduction: - self.model = model - self.delta_logp_factory = self.delta_logp - self.delta_logp = lambda q, q0: -self.delta_logp_factory(q0, q) - - def reset_tuning(self): - """Skips resetting of tuned sampler parameters - and history to their initial values. Allows - continuation with the same settings when - DEMetropolisZMLDA steps are done in chunks - under MLDA.""" - return - - def astep(self, q0): - - q_new, stats = super().astep(q0) - - # Add variance reduction functionality. - if self.mlda_variance_reduction: - if stats[0]["accepted"]: - self.Q_last = self.model.Q.get_value() - if self.sub_counter == self.mlda_subsampling_rate_above: - self.sub_counter = 0 - self.Q_reg[self.sub_counter] = self.Q_last - self.sub_counter += 1 - - return q_new, stats - - def stop_tuning(self): - """At the end of the tuning phase, this method - removes the first x% of the history so future - proposals are not informed by unconverged tuning - iterations. Runs only once after the end of tuning, - when the self.tuning_end_trigger flag is set to True. - """ - if self.tuning_end_trigger: - self.tuning_end_trigger = False - return super().stop_tuning() - else: - return - - -class MLDA(ArrayStepShared): - """ - Multi-Level Delayed Acceptance (MLDA) sampling step that uses coarse - approximations of a fine model to construct proposals in multiple levels. - - MLDA creates a hierarchy of MCMC chains. Chains sample from different - posteriors that ideally should be approximations of the fine (top-level) - posterior and require less computational effort to evaluate their likelihood. - - Each chain runs for a fixed number of iterations (up to subsampling_rate) and - then the last sample generated is used as a proposal for the chain in the level - above (excluding when variance reduction is used, where a random sample from - the generated sequence is used). The bottom-level chain is a MetropolisMLDA - or DEMetropolisZMLDA sampler. - - The algorithm achieves higher acceptance rate and effective sample sizes - than other samplers if the coarse models are sufficiently good approximations - of the fine one. - - Parameters - ---------- - coarse_models : list - List of coarse (multi-level) models, where the first model - is the coarsest one (level=0) and the last model is the - second finest one (level=L-1 where L is the number of levels). - Note this list excludes the model passed to the model - argument above, which is the finest available. - vars : list - List of value variables for sampler - base_sampler : string - Sampler used in the base (coarsest) chain. Can be 'Metropolis' or - 'DEMetropolisZ'. Defaults to 'DEMetropolisZ'. - base_S : standard deviation of base proposal covariance matrix - Some measure of variance to parameterize base proposal distribution - base_proposal_dist : function - Function that returns zero-mean deviates when parameterized with - S (and n). Defaults to normal. This is the proposal used in the - coarsest (base) chain, i.e. level=0. - base_scaling : scalar or array - Initial scale factor for base proposal. Defaults to 1 if base_sampler - is 'Metropolis' and to 0.001 if base_sampler is 'DEMetropolisZ'. - tune : bool - Flag for tuning in the base proposal. If base_sampler is 'Metropolis' it - should be True or False and defaults to True. Note that - this is overidden by the tune parameter in sample(). For example when calling - step=MLDA(tune=False, ...) and then sample(step=step, tune=200, ...), - tuning will be activated for the first 200 steps. If base_sampler is - 'DEMetropolisZ', it should be True. For 'DEMetropolisZ', there is a separate - argument base_tune_target which allows modifying the type of tuning. - base_tune_target: string - Defines the type of tuning that is performed when base_sampler is - 'DEMetropolisZ'. Allowable values are 'lambda, 'scaling' or None and - it defaults to 'lambda'. - base_tune_interval : int - The frequency of tuning for the base proposal. Defaults to 100 - iterations. - base_lamb : float - Lambda parameter of the base level DE proposal mechanism. Only applicable when - base_sampler is 'DEMetropolisZ'. Defaults to 2.38 / sqrt(2 * ndim) - base_tune_drop_fraction : float - Fraction of tuning steps that will be removed from the base level samplers - history when the tuning ends. Only applicable when base_sampler is - 'DEMetropolisZ'. Defaults to 0.9 - keeping the last 10% of tuning steps - for good mixing while removing 90% of potentially unconverged tuning positions. - model : PyMC Model - Optional model for sampling step. Defaults to None - (taken from context). This model should be the finest of all - multilevel models. - mode : string or `Mode` instance. - Compilation mode passed to Aesara functions - subsampling_rates : integer or list of integers - One interger for all levels or a list with one number for each level - (excluding the finest level). - This is the number of samples generated in level l-1 to - propose a sample for level l - applies to all levels excluding the - finest level). The length of the list needs to be the same as the - length of coarse_models. - base_blocked : bool - Flag to choose whether base sampler (level=0) is a - Compound MetropolisMLDA step (base_blocked=False) - or a blocked MetropolisMLDA step (base_blocked=True). - Only applicable when base_sampler='Metropolis'. - variance_reduction: bool - Calculate and store quantities of interest and quantity of interest - differences between levels to enable computing a variance-reduced - sum of the quantity of interest after sampling. In order to use - variance reduction, the user needs to do the following when defining - the PyMC model (also demonstrated in the example notebook): - - Include a `pm.MutableData()` variable with the name `Q` in the - model description of all levels. - - Use an Aesara Op to calculate the forward model (or the - combination of a forward model and a likelihood). This Op - should have a `perform()` method which (in addition to all - the other calculations), calculates the quantity of interest - and stores it to the variable `Q` of the PyMC model, - using the `set_value()` function. - When variance_reduction=True, all subchains run for a fixed number - of iterations (equal to subsampling_rates) and a random sample is - selected from the generated sequence (instead of the last sample - which is selected when variance_reduction=False). - store_Q_fine: bool - Store the values of the quantity of interest from the fine chain. - adaptive_error_model : bool - When True, MLDA will use the adaptive error model method - proposed in [Cui2012]. The method requires the likelihood of - the model to be adaptive and a forward model to be defined and - fed to the sampler. Thus, it only works when the user does - the following (also demonstrated in the example notebook): - - Include in the model definition at all levels, - the extra variable model_output, which will capture - the forward model outputs. Also include in the model - definition at all levels except the finest one, the - extra variables mu_B and Sigma_B, which will capture - the bias between different levels. All these variables - should be instantiated using the pm.MutableData method. - - Use an Aesara Op to define the forward model (and - optionally the likelihood) for all levels. The Op needs - to store the result of each forward model calculation - to the variable model_output of the PyMC model, - using the `set_value()` function. - - Define a Multivariate Normal likelihood (either using - the standard PyMC API or within an Op) which has mean - equal to the forward model output plus mu_B and covariance - equal to the model error plus Sigma_B. - Given the above, MLDA will capture and iteratively update the - bias terms internally for all level pairs and will correct - each level so that all levels' forward models aim to estimate - the finest level's forward model. - - Examples - ---------- - .. code:: ipython - >>> import pymc as pm - ... datum = 1 - ... - ... with pm.Model() as coarse_model: - ... x = pm.Normal("x", mu=0, sigma=10) - ... y = pm.Normal("y", mu=x, sigma=1, observed=datum - 0.1) - ... - ... with pm.Model(): - ... x = pm.Normal("x", mu=0, sigma=10) - ... y = pm.Normal("y", mu=x, sigma=1, observed=datum) - ... step_method = pm.MLDA(coarse_models=[coarse_model], - ... subsampling_rates=5) - ... idata = pm.sample(500, chains=2, - ... tune=100, step=step_method, - ... random_seed=123) - ... - ... az.summary(idata, kind="stats") - mean sd hdi_3% hdi_97% - x 0.99 0.987 -0.734 2.992 - - References - ---------- - .. [Dodwell2019] Dodwell, Tim & Ketelsen, Chris & Scheichl, - Robert & Teckentrup, Aretha. (2019). - Multilevel Markov Chain Monte Carlo. - SIAM Review. 61. 509-545. - `link `__ - .. [Cui2012] Cui, Tiangang & Fox, Colin & O'Sullivan, Michael. - (2012). Adaptive Error Modelling in MCMC Sampling for Large - Scale Inverse Problems. - """ - - name = "mlda" - - # All levels use block sampling, - # except level 0 where the user can choose - default_blocked = True - generates_stats = True - - def __init__( - self, - coarse_models: List[Model], - vars: Optional[list] = None, - base_sampler="DEMetropolisZ", - base_S: Optional = None, - base_proposal_dist: Optional[Type[Proposal]] = None, - base_scaling: Optional = None, - tune: bool = True, - base_tune_target: str = "lambda", - base_tune_interval: int = 100, - base_lamb: Optional = None, - base_tune_drop_fraction: float = 0.9, - model: Optional[Model] = None, - mode: Optional = None, - subsampling_rates: List[int] = 5, - base_blocked: bool = False, - variance_reduction: bool = False, - store_Q_fine: bool = False, - adaptive_error_model: bool = False, - **kwargs, - ) -> None: - warnings.warn( - "The MLDA stepper will be migrated to the pymc-experimental project!" - " See https://github.com/pymc-devs/pymc/issues/5942", - DeprecationWarning, - ) - - # this variable is used to identify MLDA objects which are - # not in the finest level (i.e. child MLDA objects) - self.is_child = kwargs.get("is_child", False) - - if not isinstance(coarse_models, list): - raise ValueError("MLDA step method cannot use coarse_models if it is not a list") - if len(coarse_models) == 0: - raise ValueError( - "MLDA step method was given an empty " - "list of coarse models. Give at least " - "one coarse model." - ) - - # assign internal state - model = pm.modelcontext(model) - initial_values = model.initial_point() - self.model = model - self.coarse_models = coarse_models - self.model_below = self.coarse_models[-1] - self.num_levels = len(self.coarse_models) + 1 - - # set up variance reduction. - self.variance_reduction = variance_reduction - self.store_Q_fine = store_Q_fine - - # check that certain requirements hold - # for the variance reduction feature to work - if self.variance_reduction or self.store_Q_fine: - if not hasattr(self.model, "Q"): - raise AttributeError( - "Model given to MLDA does not contain" - "variable 'Q'. You need to include" - "the variable in the model definition" - "for variance reduction to work or" - "for storing the fine Q." - "Use pm.MutableData() to define it." - ) - if not isinstance(self.model.Q, TensorSharedVariable): - raise TypeError( - "The variable 'Q' in the model definition is not of type " - "'TensorSharedVariable'. Use pm.MutableData() to define the" - "variable." - ) - - if self.is_child and self.variance_reduction: - # this is the subsampling rate applied to the current level - # it is stored in the level above and transferred here - self.subsampling_rate_above = kwargs.pop("subsampling_rate_above", None) - - # set up adaptive error model - self.adaptive_error_model = adaptive_error_model - - # check that certain requirements hold - # for the adaptive error model feature to work - if self.adaptive_error_model: - if not hasattr(self.model_below, "mu_B"): - raise AttributeError( - "Model below in hierarchy does not contain" - "variable 'mu_B'. You need to include" - "the variable in the model definition" - "for adaptive error model to work." - "Use pm.MutableData() to define it." - ) - if not hasattr(self.model_below, "Sigma_B"): - raise AttributeError( - "Model below in hierarchy does not contain" - "variable 'Sigma_B'. You need to include" - "the variable in the model definition" - "for adaptive error model to work." - "Use pm.MutableData() to define it." - ) - if not ( - isinstance(self.model_below.mu_B, TensorSharedVariable) - and isinstance(self.model_below.Sigma_B, TensorSharedVariable) - ): - raise TypeError( - "At least one of the variables 'mu_B' and 'Sigma_B' " - "in the definition of the below model is not of type " - "'TensorSharedVariable'. Use pm.MutableData() to define those " - "variables." - ) - - # this object is used to recursively update the mean and - # variance of the bias correction given new differences - # between levels - self.bias = RecursiveSampleMoments( - self.model_below.mu_B.get_value(), self.model_below.Sigma_B.get_value() - ) - - # this list holds the bias objects from all levels - # it is gradually constructed when MLDA objects are - # created and then shared between all levels - self.bias_all = kwargs.pop("bias_all", None) - if self.bias_all is None: - self.bias_all = [self.bias] - else: - self.bias_all.append(self.bias) - - # variables used for adaptive error model - self.last_synced_output_diff = None - self.adaptation_started = False - - # set up subsampling rates. - if isinstance(subsampling_rates, int): - self.subsampling_rates = [subsampling_rates] * len(self.coarse_models) - else: - if len(subsampling_rates) != len(self.coarse_models): - raise ValueError( - f"List of subsampling rates needs to have the same " - f"length as list of coarse models but the lengths " - f"were {len(subsampling_rates)}, {len(self.coarse_models)}" - ) - self.subsampling_rates = subsampling_rates - - self.subsampling_rate = self.subsampling_rates[-1] - self.subchain_selection = None - - # set up base sampling - self.base_sampler = base_sampler - - # VR is not compatible with compound base samplers so an automatic conversion - # to a block sampler happens here if - if self.variance_reduction and self.base_sampler == "Metropolis" and not base_blocked: - warnings.warn( - "Variance reduction is not compatible with non-blocked (compound) samplers." - "Automatically switching to a blocked Metropolis sampler." - ) - self.base_blocked = True - else: - self.base_blocked = base_blocked - - self.base_S = base_S - self.base_proposal_dist = base_proposal_dist - - if base_scaling is None: - if self.base_sampler == "Metropolis": - self.base_scaling = 1.0 - else: - self.base_scaling = 0.001 - else: - self.base_scaling = float(base_scaling) - - self.tune = tune - if not self.tune and self.base_sampler == "DEMetropolisZ": - raise ValueError( - f"The argument tune was set to False while using" - f" a 'DEMetropolisZ' base sampler. 'DEMetropolisZ' " - f" tune needs to be True." - ) - - self.base_tune_target = base_tune_target - self.base_tune_interval = base_tune_interval - self.base_lamb = base_lamb - self.base_tune_drop_fraction = float(base_tune_drop_fraction) - self.base_tuning_stats = None - - self.mode = mode - - # Process model variables - if vars is None: - vars = model.value_vars - else: - vars = [model.rvs_to_values.get(var, var) for var in vars] - vars = pm.inputvars(vars) - self.vars = vars - self.var_names = [var.name for var in self.vars] - - self.accepted = 0 - - # Construct Aesara function for current-level model likelihood - # (for use in acceptance) - shared = pm.make_shared_replacements(initial_values, vars, model) - self.delta_logp = delta_logp(initial_values, model.logp(), vars, shared) - - # Construct Aesara function for below-level model likelihood - # (for use in acceptance) - model_below = pm.modelcontext(self.model_below) - vars_below = [var for var in model_below.value_vars if var.name in self.var_names] - vars_below = pm.inputvars(vars_below) - shared_below = pm.make_shared_replacements(initial_values, vars_below, model_below) - self.delta_logp_below = delta_logp( - initial_values, model_below.logp(), vars_below, shared_below - ) - - super().__init__(vars, shared) - - # initialise complete step method hierarchy - if self.num_levels == 2: - with self.model_below: - # make sure the correct variables are selected from model_below - vars_below = [ - var for var in self.model_below.value_vars if var.name in self.var_names - ] - - # create kwargs - if self.variance_reduction: - base_kwargs = { - "mlda_subsampling_rate_above": self.subsampling_rate, - "mlda_variance_reduction": True, - } - else: - base_kwargs = {} - - if self.base_sampler == "Metropolis": - # MetropolisMLDA sampler in base level (level=0), targeting self.model_below - self.step_method_below = pm.MetropolisMLDA( - vars=vars_below, - proposal_dist=self.base_proposal_dist, - S=self.base_S, - scaling=self.base_scaling, - tune=self.tune, - tune_interval=self.base_tune_interval, - model=None, - mode=self.mode, - blocked=self.base_blocked, - **base_kwargs, - ) - else: - # DEMetropolisZMLDA sampler in base level (level=0), targeting self.model_below - self.step_method_below = pm.DEMetropolisZMLDA( - vars=vars_below, - S=self.base_S, - proposal_dist=self.base_proposal_dist, - lamb=self.base_lamb, - scaling=self.base_scaling, - tune=self.base_tune_target, - tune_interval=self.base_tune_interval, - tune_drop_fraction=self.base_tune_drop_fraction, - model=None, - mode=self.mode, - **base_kwargs, - ) - else: - # drop the last coarse model - coarse_models_below = self.coarse_models[:-1] - subsampling_rates_below = self.subsampling_rates[:-1] - - with self.model_below: - # make sure the correct variables are selected from model_below - vars_below = [ - var for var in self.model_below.value_vars if var.name in self.var_names - ] - - # create kwargs - if self.variance_reduction: - mlda_kwargs = { - "is_child": True, - "subsampling_rate_above": self.subsampling_rate, - } - else: - mlda_kwargs = {"is_child": True} - if self.adaptive_error_model: - mlda_kwargs = {**mlda_kwargs, **{"bias_all": self.bias_all}} - - # MLDA sampler in some intermediate level, targeting self.model_below - self.step_method_below = pm.MLDA( - vars=vars_below, - base_S=self.base_S, - base_sampler=self.base_sampler, - base_proposal_dist=self.base_proposal_dist, - base_scaling=self.base_scaling, - tune=self.tune, - base_tune_target=self.base_tune_target, - base_tune_interval=self.base_tune_interval, - base_lamb=self.base_lamb, - base_tune_drop_fraction=self.base_tune_drop_fraction, - model=None, - mode=self.mode, - subsampling_rates=subsampling_rates_below, - coarse_models=coarse_models_below, - base_blocked=self.base_blocked, - variance_reduction=self.variance_reduction, - store_Q_fine=False, - adaptive_error_model=self.adaptive_error_model, - **mlda_kwargs, - ) - - # instantiate the recursive DA proposal. - # this is the main proposal used for - # all levels (Recursive Delayed Acceptance) - # (except for level 0 where the step method is MetropolisMLDA - # or DEMetropolisZMLDA - not MLDA) - self.proposal_dist = RecursiveDAProposal( - self.step_method_below, self.model_below, self.tune, self.subsampling_rate - ) - - # set up data types of stats. - if isinstance(self.step_method_below, MLDA): - # get the stat types from the level below if that level is MLDA - self.stats_dtypes = self.step_method_below.stats_dtypes - - else: - # otherwise, set it up from scratch. - self.stats_dtypes = [{"accept": np.float64, "accepted": bool, "tune": bool}] - - if isinstance(self.step_method_below, MetropolisMLDA): - self.stats_dtypes.append({"base_scaling": np.float64}) - elif isinstance(self.step_method_below, DEMetropolisZMLDA): - self.stats_dtypes.append({"base_scaling": np.float64, "base_lambda": np.float64}) - elif isinstance(self.step_method_below, CompoundStep): - for method in self.step_method_below.methods: - if isinstance(method, MetropolisMLDA): - self.stats_dtypes.append({"base_scaling": np.float64}) - elif isinstance(method, DEMetropolisZMLDA): - self.stats_dtypes.append( - {"base_scaling": np.float64, "base_lambda": np.float64} - ) - - # initialise necessary variables for doing variance reduction - if self.variance_reduction: - self.sub_counter = 0 - self.Q_diff = [] - if self.is_child: - self.Q_reg = [np.nan] * self.subsampling_rate_above - if self.num_levels == 2: - self.Q_base_full = [] - if not self.is_child: - for level in range(self.num_levels - 1, 0, -1): - self.stats_dtypes[0][f"Q_{level}_{level - 1}"] = object - self.stats_dtypes[0]["Q_0"] = object - - # initialise necessary variables for doing variance reduction or storing fine Q - if self.variance_reduction or self.store_Q_fine: - self.Q_last = np.nan - self.Q_diff_last = np.nan - if self.store_Q_fine and not self.is_child: - self.stats_dtypes[0][f"Q_{self.num_levels - 1}"] = object - - def astep(self, q0: RaveledVars) -> Tuple[RaveledVars, List[Dict[str, Any]]]: - """One MLDA step, given current sample q0""" - # Check if the tuning flag has been changed and if yes, - # change the proposal's tuning flag and reset self.accepted - # This is triggered by _iter_sample while the highest-level MLDA step - # method is running. It then propagates to all levels. - if self.proposal_dist.tune != self.tune: - self.proposal_dist.tune = self.tune - # set tune in sub-methods of compound stepper explicitly because - # it is not set within sample.py (only the CompoundStep's tune flag is) - if isinstance(self.step_method_below, CompoundStep): - for method in self.step_method_below.methods: - method.tune = self.tune - self.accepted = 0 - - # Set subchain_selection (which sample from the coarse chain - # is passed as a proposal to the fine chain). If variance - # reduction is used, a random sample is selected as proposal. - # If variance reduction is not used, the last sample is - # selected as proposal. - if self.variance_reduction: - self.subchain_selection = np.random.randint(0, self.subsampling_rate) - else: - self.subchain_selection = self.subsampling_rate - 1 - self.proposal_dist.subchain_selection = self.subchain_selection - - # Call the recursive DA proposal to get proposed sample - # and convert dict -> numpy array - q = self.proposal_dist(q0) - - # Evaluate MLDA acceptance log-ratio - # If proposed sample from lower levels is the same as current one, - # do not calculate likelihood, just set accept to 0.0 - if (q.data == q0.data).all(): - accept = np.float64(0.0) - skipped_logp = True - else: - # NB! The order and sign of the first term are swapped compared - # to the convention to make sure the proposal is evaluated last. - accept = -self.delta_logp(q0.data, q.data) + self.delta_logp_below(q0.data, q.data) - skipped_logp = False - - # Accept/reject sample - next sample is stored in q_new - q_new, accepted = metrop_select(accept, q, q0) - if skipped_logp: - accepted = False - - # if sample is accepted, update self.Q_last with the sample's Q value - # runs only for VR or when store_Q_fine is True - if self.variance_reduction or self.store_Q_fine: - if accepted and not skipped_logp: - self.Q_last = self.model.Q.get_value() - - # Variance reduction - if self.variance_reduction: - self.update_vr_variables(accepted, skipped_logp) - - # Adaptive error model - runs only during tuning. - if self.tune and self.adaptive_error_model: - self.update_error_estimate(accepted, skipped_logp) - - # Update acceptance counter - self.accepted += accepted - - stats = {"tune": self.tune, "accept": np.exp(accept), "accepted": accepted} - - # Save the VR statistics to the stats dictionary (only happens in the - # top MLDA level) - if (self.variance_reduction or self.store_Q_fine) and not self.is_child: - q_stats = {} - if self.variance_reduction: - m = self - for level in range(self.num_levels - 1, 0, -1): - # save the Q differences for this level and iteration - q_stats[f"Q_{level}_{level - 1}"] = np.array(m.Q_diff) - # this makes sure Q_diff is reset for - # the next iteration - m.Q_diff = [] - if level == 1: - break - m = m.step_method_below - q_stats["Q_0"] = np.array(m.Q_base_full) - m.Q_base_full = [] - if self.store_Q_fine: - q_stats["Q_" + str(self.num_levels - 1)] = np.array(self.Q_last) - stats = {**stats, **q_stats} - - # Capture the base tuning stats from the level below. - self.base_tuning_stats = [] - - if isinstance(self.step_method_below, MLDA): - self.base_tuning_stats = self.step_method_below.base_tuning_stats - elif isinstance(self.step_method_below, MetropolisMLDA): - self.base_tuning_stats.append({"base_scaling": np.mean(self.step_method_below.scaling)}) - elif isinstance(self.step_method_below, DEMetropolisZMLDA): - self.base_tuning_stats.append( - { - "base_scaling": np.mean(self.step_method_below.scaling), - "base_lambda": self.step_method_below.lamb, - } - ) - elif isinstance(self.step_method_below, CompoundStep): - # Below method is CompoundStep - for method in self.step_method_below.methods: - if isinstance(method, MetropolisMLDA): - self.base_tuning_stats.append({"base_scaling": np.mean(method.scaling)}) - elif isinstance(method, DEMetropolisZMLDA): - self.base_tuning_stats.append( - {"base_scaling": np.mean(method.scaling), "base_lambda": method.lamb} - ) - - return q_new, [stats] + self.base_tuning_stats - - def update_vr_variables(self, accepted, skipped_logp): - """Updates all the variables necessary for VR to work. - - Each level has a Q_last and Q_diff_last register which store - the Q of the last accepted MCMC sample and the difference - between the Q of the last accepted sample in this level and - the Q of the last sample in the level below. - - These registers are updated here so that they can be exported later.""" - - # if this MLDA is not at the finest level, store Q_last in a - # register Q_reg and increase sub_counter (until you reach - # the subsampling rate, at which point you make it zero). - # Q_reg will later be used by the level above to calculate differences - if self.is_child: - if self.sub_counter == self.subsampling_rate_above: - self.sub_counter = 0 - self.Q_reg[self.sub_counter] = self.Q_last - self.sub_counter += 1 - - # if MLDA is in the level above the base level, extract the - # latest set of Q values from Q_reg in the base level - # and add them to Q_base_full (which stores all the history of - # Q values from the base level) - if self.num_levels == 2: - self.Q_base_full.extend(self.step_method_below.Q_reg) - - # if the sample is accepted, update Q_diff_last with the latest - # difference between the last Q of this level and the Q of the - # proposed (selected) sample from the level below. - # If sample is not accepted, just keep the latest accepted Q_diff - if accepted and not skipped_logp: - self.Q_diff_last = self.Q_last - self.step_method_below.Q_reg[self.subchain_selection] - # Add the last accepted Q_diff to the list - self.Q_diff.append(self.Q_diff_last) - - def update_error_estimate(self, accepted, skipped_logp): - """Updates the adaptive error model estimate with - the latest accepted forward model output difference. Also - updates the model variables mu_B and Sigma_B. - - The current level estimates and stores the error - model between the current level and the level below.""" - - # only save errors when a sample is accepted (excluding skipped_logp) - if accepted and not skipped_logp: - # this is the error (i.e. forward model output difference) - # between the current level's model and the model in the level below - self.last_synced_output_diff = ( - self.model.model_output.get_value() - self.model_below.model_output.get_value() - ) - self.adaptation_started = True - - if self.adaptation_started: - # update the internal recursive bias estimator with the last saved error - self.bias.update(self.last_synced_output_diff) - # Update the model variables in the level below the current one. - # Each level has its own bias correction (i.e. bias object) that - # estimates the error between that level and the one below. - # The model variables mu_B and Signa_B of a level are the - # sum of the bias corrections of all levels below and including - # that level. This sum is updated here. - with self.model_below: - pm.set_data( - { - "mu_B": sum( - bias.get_mu() - for bias in self.bias_all[: len(self.bias_all) - self.num_levels + 2] - ) - } - ) - pm.set_data( - { - "Sigma_B": sum( - bias.get_sigma() - for bias in self.bias_all[: len(self.bias_all) - self.num_levels + 2] - ) - } - ) - - @staticmethod - def competence(var, has_grad): - """Return MLDA competence for given var/has_grad. MLDA currently works - only with continuous variables.""" - if var.dtype in pm.discrete_types: - return Competence.INCOMPATIBLE - return Competence.COMPATIBLE - - -class RecursiveSampleMoments: - """ - Iteratively constructs a sample mean - and covariance, given input samples. - - Used to capture an estimate of the mean - and covariance of the bias of an MLDA - coarse model. - """ - - def __init__(self, mu_0, sigma_0, t=1): - self.mu = mu_0 - self.sigma = sigma_0 - self.t = t - - def __call__(self): - return self.mu, self.sigma - - def get_mu(self): - """Returns the current mu value""" - return self.mu - - def get_sigma(self): - """Returns the current covariance value""" - return self.sigma - - def update(self, x): - """Updates the mean and covariance given a - new sample x""" - mu_previous = self.mu.copy() - - self.mu = (1 / (self.t + 1)) * (self.t * mu_previous + x) - - self.sigma = (self.t - 1) / self.t * self.sigma + 1 / self.t * ( - self.t * np.outer(mu_previous, mu_previous) - - (self.t + 1) * np.outer(self.mu, self.mu) - + np.outer(x, x) - ) - - self.t += 1 - - -def extract_Q_estimate(trace, levels): - """ - Returns expectation and standard error of quantity of interest, - given a trace and the number of levels in the multilevel model. - It makes use of the collapsing sum formula. Only applicable when - MLDA with variance reduction has been used for sampling. - """ - - Q_0_raw = trace.get_sampler_stats("Q_0").squeeze() - Q_0 = np.concatenate(Q_0_raw)[None, ::] - ess_Q_0 = az.ess(np.array(Q_0, np.float64)) - Q_0_var = Q_0.var() / ess_Q_0 - - Q_diff_means = [] - Q_diff_vars = [] - for l in range(1, levels): - Q_diff_raw = trace.get_sampler_stats(f"Q_{l}_{l-1}").squeeze() - Q_diff = np.hstack(Q_diff_raw)[None, ::] - ess_diff = az.ess(np.array(Q_diff, np.float64)) - Q_diff_means.append(Q_diff.mean()) - Q_diff_vars.append(Q_diff.var() / ess_diff) - - Q_mean = Q_0.mean() + sum(Q_diff_means) - Q_se = np.sqrt(Q_0_var + sum(Q_diff_vars)) - return Q_mean, Q_se - - -def subsample( - draws=1, - step=None, - start=None, - trace=None, - tune=0, - model=None, -): - """ - A stripped down version of sample(), which is called only - by the RecursiveDAProposal (which is the proposal used in the MLDA - stepper). RecursiveDAProposal only requires a small set of the input - parameters and checks normally performed by sample(), and this - function thus skips some of the code in sampler(). It directly calls - _iter_sample(), rather than sample_many(). The result is a reduced - overhead when running multiple levels in MLDA. - """ - - model = pm.modelcontext(model) - chain = 0 - random_seed = np.random.randint(2**30) - callback = None - - draws += tune - - sampling = pm.sampling._iter_sample( - draws, step, start, trace, chain, tune, model, random_seed, callback - ) - - try: - for it, (trace, _) in enumerate(sampling): - pass - except KeyboardInterrupt: - pass - - return trace - - -# Available proposal distributions for MLDA - - -class RecursiveDAProposal(Proposal): - """ - Recursive Delayed Acceptance proposal to be used with MLDA step sampler. - Recursively calls an MLDA sampler if level > 0 and calls MetropolisMLDA or - DEMetropolisZMLDA sampler if level = 0. The sampler generates - self.subsampling_rate samples and returns the sample with index - self.subchain_selection to be used as a proposal. - Results in a hierarchy of chains each of which is used to propose - samples to the chain above. - """ - - def __init__( - self, - step_method_below: Union[MLDA, MetropolisMLDA, DEMetropolisZMLDA, CompoundStep], - model_below: Model, - tune: bool, - subsampling_rate: int, - ) -> None: - - self.step_method_below = step_method_below - self.model_below = model_below - self.tune = tune - self.subsampling_rate = subsampling_rate - self.subchain_selection = None - self.tuning_end_trigger = True - - def __call__(self, q0: RaveledVars) -> RaveledVars: - """Returns proposed sample given the current sample - in dictionary form (q0_dict).""" - - # Logging is reduced to avoid extensive console output - # during multiple recursive calls of subsample() - _log = logging.getLogger("pymc") - _log.setLevel(logging.ERROR) - - # Convert current sample from RaveledVars -> - # dict before feeding to subsample. - q0_dict = DictToArrayBijection.rmap(q0) - - with self.model_below: - # Check if the tuning flag has been set to False - # in which case tuning is stopped. The flag is set - # to False (by MLDA's astep) when the burn-in - # iterations of the highest-level MLDA sampler run out. - # The change propagates to all levels. - - if self.tune: - # Subsample in tuning mode - trace = subsample( - draws=0, - step=self.step_method_below, - start=q0_dict, - tune=self.subsampling_rate, - ) - else: - # Subsample in normal mode without tuning - # If DEMetropolisZMLDA is the base sampler a flag is raised to - # make sure that history is edited after tuning ends - if self.tuning_end_trigger: - if isinstance(self.step_method_below, DEMetropolisZMLDA): - self.step_method_below.tuning_end_trigger = True - self.tuning_end_trigger = False - - trace = subsample( - draws=self.subsampling_rate, - step=self.step_method_below, - start=q0_dict, - tune=0, - ) - - # set logging back to normal - _log.setLevel(logging.NOTSET) - - # return sample with index self.subchain_selection from the generated - # sequence of length self.subsampling_rate. The index is set within - # MLDA's astep() function - q_dict = trace.point(self.subchain_selection) - - # Make sure output dict is ordered the same way as the input dict. - q_dict = Point( - {key: q_dict[key] for key in q0_dict.keys()}, - model=self.model_below, - filter_model_vars=True, - ) - - return DictToArrayBijection.map(q_dict) diff --git a/pymc/tests/test_step.py b/pymc/tests/test_step.py index 49abd01e86..8cf809069c 100644 --- a/pymc/tests/test_step.py +++ b/pymc/tests/test_step.py @@ -24,12 +24,10 @@ import pytest from aesara.compile.ops import as_op -from aesara.graph.op import Op import pymc as pm from pymc.aesaraf import floatX -from pymc.data import Data, MutableData from pymc.distributions import ( Bernoulli, Beta, @@ -38,14 +36,12 @@ Dirichlet, HalfNormal, Multinomial, - MvNormal, Normal, ) from pymc.exceptions import SamplingError -from pymc.model import Model, Potential, set_data +from pymc.model import Model from pymc.sampling import assign_step_methods, sample from pymc.step_methods import ( - MLDA, NUTS, BinaryGibbsMetropolis, CategoricalGibbsMetropolis, @@ -56,18 +52,14 @@ Metropolis, MultivariateNormalProposal, NormalProposal, - RecursiveDAProposal, Slice, - UniformProposal, ) -from pymc.step_methods.mlda import extract_Q_estimate from pymc.tests.checks import close_to from pymc.tests.helpers import fast_unstable_sampling_mode from pymc.tests.models import ( mv_simple, mv_simple_coarse, mv_simple_discrete, - mv_simple_very_coarse, simple_2model_continuous, simple_categorical, ) @@ -111,15 +103,6 @@ def check_stat_dtype(self, step, idata): ), 1000, ), - # MLDA takes 1/2 of the total test time! - ( - lambda C, model_coarse: MLDA( - coarse_models=[model_coarse], - base_S=C, - base_proposal_dist=MultivariateNormalProposal, - ), - 1000, - ), (lambda *_: Slice(), 2000), (lambda *_: Slice(blocked=True), 2000), ( @@ -662,706 +645,6 @@ def test_sampler_stats(self): assert (trace.model_logp == model_logp_).all() -class TestMLDA: - steppers = [MLDA] - - def test_proposal_and_base_proposal_choice(self): - """Test that proposal_dist and base_proposal_dist are set as - expected by MLDA""" - _, model, _ = mv_simple() - _, model_coarse, _ = mv_simple_coarse() - with model: - sampler = MLDA(coarse_models=[model_coarse], base_sampler="Metropolis") - assert isinstance(sampler.proposal_dist, RecursiveDAProposal) - assert sampler.base_proposal_dist is None - assert isinstance(sampler.step_method_below.proposal_dist, NormalProposal) - - sampler = MLDA(coarse_models=[model_coarse]) - assert isinstance(sampler.proposal_dist, RecursiveDAProposal) - assert sampler.base_proposal_dist is None - assert isinstance(sampler.step_method_below.proposal_dist, UniformProposal) - - initial_point = model.initial_point() - initial_point_size = sum(initial_point[n.name].size for n in model.value_vars) - s = np.ones(initial_point_size) - sampler = MLDA(coarse_models=[model_coarse], base_sampler="Metropolis", base_S=s) - assert isinstance(sampler.proposal_dist, RecursiveDAProposal) - assert sampler.base_proposal_dist is None - assert isinstance(sampler.step_method_below.proposal_dist, NormalProposal) - - sampler = MLDA(coarse_models=[model_coarse], base_S=s) - assert isinstance(sampler.proposal_dist, RecursiveDAProposal) - assert sampler.base_proposal_dist is None - assert isinstance(sampler.step_method_below.proposal_dist, UniformProposal) - - s = np.diag(s) - sampler = MLDA(coarse_models=[model_coarse], base_sampler="Metropolis", base_S=s) - assert isinstance(sampler.proposal_dist, RecursiveDAProposal) - assert sampler.base_proposal_dist is None - assert isinstance(sampler.step_method_below.proposal_dist, MultivariateNormalProposal) - - sampler = MLDA(coarse_models=[model_coarse], base_S=s) - assert isinstance(sampler.proposal_dist, RecursiveDAProposal) - assert sampler.base_proposal_dist is None - assert isinstance(sampler.step_method_below.proposal_dist, UniformProposal) - - s[0, 0] = -s[0, 0] - with pytest.raises(np.linalg.LinAlgError): - MLDA(coarse_models=[model_coarse], base_sampler="Metropolis", base_S=s) - - def test_step_methods_in_each_level(self): - """Test that MLDA creates the correct hierarchy of step methods when no - coarse models are passed and when two coarse models are passed.""" - _, model, _ = mv_simple() - _, model_coarse, _ = mv_simple_coarse() - _, model_very_coarse, _ = mv_simple_very_coarse() - with model: - initial_point = model.initial_point() - initial_point_size = sum(initial_point[n.name].size for n in model.value_vars) - s = np.ones(initial_point_size) + 2.0 - sampler = MLDA( - coarse_models=[model_very_coarse, model_coarse], - base_S=s, - base_sampler="Metropolis", - ) - assert isinstance(sampler.step_method_below, MLDA) - assert isinstance(sampler.step_method_below.step_method_below, Metropolis) - assert np.all(sampler.step_method_below.step_method_below.proposal_dist.s == s) - - sampler = MLDA(coarse_models=[model_very_coarse, model_coarse], base_S=s) - assert isinstance(sampler.step_method_below, MLDA) - assert isinstance(sampler.step_method_below.step_method_below, DEMetropolisZ) - assert np.all(sampler.step_method_below.step_method_below.proposal_dist.s == s) - - def test_exceptions_coarse_models(self): - """Test that MLDA generates the expected exceptions when no coarse_models arg - is passed, an empty list is passed or when coarse_models is not a list""" - with pytest.raises(TypeError): - _, model, _ = mv_simple() - with model: - MLDA() - - with pytest.raises(ValueError): - _, model, _ = mv_simple() - with model: - MLDA(coarse_models=[]) - - with pytest.raises(ValueError): - _, model, _ = mv_simple() - with model: - MLDA(coarse_models=(model, model)) - - def test_nonparallelized_chains_are_random(self): - """Test that parallel chain are not identical when no parallelisation - is applied""" - with Model() as coarse_model: - Normal("x", 0.3, 1) - - with Model(): - Normal("x", 0, 1) - for stepper in TestMLDA.steppers: - step = stepper(coarse_models=[coarse_model]) - idata = sample(chains=2, cores=1, draws=20, tune=0, step=step, random_seed=1) - samples = idata.posterior["x"].values[:, 5] - assert len(set(samples)) == 2, f"Non parallelized {stepper} chains are identical." - - def test_parallelized_chains_are_random(self): - """Test that parallel chain are - not identical when parallelisation - is applied""" - with Model() as coarse_model: - Normal("x", 0.3, 1) - - with Model(): - Normal("x", 0, 1) - for stepper in TestMLDA.steppers: - step = stepper(coarse_models=[coarse_model]) - idata = sample(chains=2, cores=2, draws=20, tune=0, step=step, random_seed=1) - samples = idata.posterior["x"].values[:, 5] - assert len(set(samples)) == 2, f"Parallelized {stepper} chains are identical." - - def test_acceptance_rate_against_coarseness(self): - """Test that the acceptance rate increases - when the coarse model is closer to - the fine model.""" - with Model() as coarse_model_0: - Normal("x", 5.0, 1.0) - - with Model() as coarse_model_1: - Normal("x", 6.0, 2.0) - - with Model() as coarse_model_2: - Normal("x", 20.0, 5.0) - - possible_coarse_models = [coarse_model_0, coarse_model_1, coarse_model_2] - acc = [] - - with Model(): - Normal("x", 5.0, 1.0) - for coarse_model in possible_coarse_models: - step = MLDA(coarse_models=[coarse_model], subsampling_rates=3) - idata = sample(chains=1, draws=500, tune=100, step=step, random_seed=1) - acc.append(idata.sample_stats["accepted"].mean()) - assert acc[0] > acc[1] > acc[2], ( - "Acceptance rate is not " - "strictly increasing when" - "coarse model is closer to " - "fine model. Acceptance rates" - "were: {}".format(acc) - ) - - def test_mlda_non_blocked(self): - """Test that MLDA correctly creates non-blocked - compound steps in level 0 when using a Metropolis - base sampler.""" - _, model = simple_2model_continuous() - _, model_coarse = simple_2model_continuous() - with model: - for stepper in self.steppers: - assert isinstance( - stepper( - coarse_models=[model_coarse], - base_sampler="Metropolis", - base_blocked=False, - ).step_method_below, - CompoundStep, - ) - - def test_mlda_blocked(self): - """Test the type of base sampler instantiated - when switching base_blocked flag while - the base sampler is Metropolis and when - the base sampler is DEMetropolisZ.""" - _, model = simple_2model_continuous() - _, model_coarse = simple_2model_continuous() - with model: - for stepper in self.steppers: - assert not isinstance( - stepper( - coarse_models=[model_coarse], - base_sampler="Metropolis", - base_blocked=True, - ).step_method_below, - CompoundStep, - ) - assert isinstance( - stepper( - coarse_models=[model_coarse], - base_sampler="Metropolis", - base_blocked=True, - ).step_method_below, - Metropolis, - ) - assert isinstance( - stepper(coarse_models=[model_coarse]).step_method_below, - DEMetropolisZ, - ) - - def test_tuning_and_scaling_on(self): - """Test that tune and base_scaling change as expected when - tuning is on.""" - np.random.seed(1234) - ts = 100 - _, model = simple_2model_continuous() - _, model_coarse = simple_2model_continuous() - with model: - trace_0 = sample( - tune=ts, - draws=20, - step=MLDA( - coarse_models=[model_coarse], - base_sampler="Metropolis", - base_tune_interval=50, - base_scaling=100.0, - ), - chains=1, - discard_tuned_samples=False, - random_seed=1234, - return_inferencedata=False, - ) - - trace_1 = sample( - tune=ts, - draws=20, - step=MLDA( - coarse_models=[model_coarse], - base_tune_target="scaling", - base_tune_interval=50, - base_scaling=100.0, - ), - chains=1, - discard_tuned_samples=False, - random_seed=1234, - return_inferencedata=False, - ) - - trace_2 = sample( - tune=ts, - draws=20, - step=MLDA( - coarse_models=[model_coarse], - base_tune_interval=50, - base_scaling=10, - base_lamb=100.0, - ), - chains=1, - discard_tuned_samples=False, - random_seed=1234, - return_inferencedata=False, - ) - - assert trace_0.get_sampler_stats("tune", chains=0)[0] - assert trace_0.get_sampler_stats("tune", chains=0)[ts - 1] - assert not trace_0.get_sampler_stats("tune", chains=0)[ts] - assert not trace_0.get_sampler_stats("tune", chains=0)[-1] - assert trace_0.get_sampler_stats("base_scaling", chains=0)[0, 0] == 100.0 - assert trace_0.get_sampler_stats("base_scaling", chains=0)[0, 1] == 100.0 - assert trace_0.get_sampler_stats("base_scaling", chains=0)[-1, 0] < 100.0 - assert trace_0.get_sampler_stats("base_scaling", chains=0)[-1, 1] < 100.0 - - assert trace_1.get_sampler_stats("tune", chains=0)[0] - assert trace_1.get_sampler_stats("tune", chains=0)[ts - 1] - assert not trace_1.get_sampler_stats("tune", chains=0)[ts] - assert not trace_1.get_sampler_stats("tune", chains=0)[-1] - assert trace_1.get_sampler_stats("base_scaling", chains=0)[0] == 100.0 - assert trace_1.get_sampler_stats("base_scaling", chains=0)[-1] < 100.0 - - assert trace_2.get_sampler_stats("tune", chains=0)[0] - assert trace_2.get_sampler_stats("tune", chains=0)[ts - 1] - assert not trace_2.get_sampler_stats("tune", chains=0)[ts] - assert not trace_2.get_sampler_stats("tune", chains=0)[-1] - assert trace_2.get_sampler_stats("base_lambda", chains=0)[0] == 100.0 - assert trace_2.get_sampler_stats("base_lambda", chains=0)[-1] < 100.0 - - def test_tuning_and_scaling_off(self): - """Test that tuning is deactivated when sample()'s tune=0 and that - MLDA's tune=False is overridden by sample()'s tune.""" - np.random.seed(12345) - _, model = simple_2model_continuous() - _, model_coarse = simple_2model_continuous() - - ts_0 = 0 - with model: - trace_0 = sample( - tune=ts_0, - draws=100, - step=MLDA( - coarse_models=[model_coarse], - base_sampler="Metropolis", - base_tune_interval=50, - base_scaling=100.0, - tune=False, - ), - chains=1, - discard_tuned_samples=False, - random_seed=12345, - return_inferencedata=False, - ) - - ts_1 = 100 - with model: - trace_1 = sample( - tune=ts_1, - draws=20, - step=MLDA( - coarse_models=[model_coarse], - base_sampler="Metropolis", - base_tune_interval=50, - base_scaling=100.0, - tune=False, - ), - chains=1, - discard_tuned_samples=False, - random_seed=12345, - return_inferencedata=False, - ) - - assert not trace_0.get_sampler_stats("tune", chains=0)[0] - assert not trace_0.get_sampler_stats("tune", chains=0)[-1] - assert ( - trace_0.get_sampler_stats("base_scaling", chains=0)[0, 0] - == trace_0.get_sampler_stats("base_scaling", chains=0)[-1, 0] - == trace_0.get_sampler_stats("base_scaling", chains=0)[0, 1] - == trace_0.get_sampler_stats("base_scaling", chains=0)[-1, 1] - == 100.0 - ) - - assert trace_1.get_sampler_stats("tune", chains=0)[0] - assert trace_1.get_sampler_stats("tune", chains=0)[ts_1 - 1] - assert not trace_1.get_sampler_stats("tune", chains=0)[ts_1] - assert not trace_1.get_sampler_stats("tune", chains=0)[-1] - assert trace_1.get_sampler_stats("base_scaling", chains=0)[0, 0] == 100.0 - assert trace_1.get_sampler_stats("base_scaling", chains=0)[0, 1] == 100.0 - assert trace_1.get_sampler_stats("base_scaling", chains=0)[-1, 0] < 100.0 - assert trace_1.get_sampler_stats("base_scaling", chains=0)[-1, 1] < 100.0 - - ts_2 = 0 - with model: - trace_2 = sample( - tune=ts_2, - draws=100, - step=MLDA( - coarse_models=[model_coarse], - base_tune_interval=50, - base_lamb=100.0, - base_tune_target=None, - ), - chains=1, - discard_tuned_samples=False, - random_seed=12345, - return_inferencedata=False, - ) - - assert not trace_2.get_sampler_stats("tune", chains=0)[0] - assert not trace_2.get_sampler_stats("tune", chains=0)[-1] - assert ( - trace_2.get_sampler_stats("base_lambda", chains=0)[0] - == trace_2.get_sampler_stats("base_lambda", chains=0)[-1] - == trace_2.get_sampler_stats("base_lambda", chains=0)[0] - == trace_2.get_sampler_stats("base_lambda", chains=0)[-1] - == 100.0 - ) - - def test_trace_length(self): - """Check if trace length is as expected.""" - tune = 100 - draws = 50 - with Model() as coarse_model: - Normal("n", 0, 2.2, size=(3,)) - with Model(): - Normal("n", 0, 2, size=(3,)) - step = MLDA(coarse_models=[coarse_model]) - idata = sample(tune=tune, draws=draws, step=step, chains=1, discard_tuned_samples=False) - assert len(idata.warmup_posterior.draw) == tune - assert len(idata.posterior.draw) == draws - - @pytest.mark.parametrize( - "variable,has_grad,outcome", - [("n", True, 1), ("n", False, 1), ("b", True, 0), ("b", False, 0)], - ) - def test_competence(self, variable, has_grad, outcome): - """Test if competence function returns expected - results for different models""" - with Model() as pmodel: - Normal("n", 0, 2, size=(3,)) - Binomial("b", n=2, p=0.3) - assert MLDA.competence(pmodel[variable], has_grad=has_grad) == outcome - - def test_multiple_subsampling_rates(self): - """Test that when you give a single integer it is applied to all levels and - when you give a list the list is applied correctly.""" - with Model() as coarse_model_0: - Normal("n", 0, 2.2, size=(3,)) - with Model() as coarse_model_1: - Normal("n", 0, 2.1, size=(3,)) - with Model(): - Normal("n", 0, 2.0, size=(3,)) - - step_1 = MLDA(coarse_models=[coarse_model_0, coarse_model_1], subsampling_rates=3) - assert len(step_1.subsampling_rates) == 2 - assert step_1.subsampling_rates[0] == step_1.subsampling_rates[1] == 3 - - step_2 = MLDA(coarse_models=[coarse_model_0, coarse_model_1], subsampling_rates=[3, 4]) - assert step_2.subsampling_rates[0] == 3 - assert step_2.subsampling_rates[1] == 4 - - with pytest.raises(ValueError): - step_3 = MLDA( - coarse_models=[coarse_model_0, coarse_model_1], - subsampling_rates=[3, 4, 10], - ) - - def test_aem_mu_sigma(self): - """Test that AEM estimates mu_B and Sigma_B in - the coarse models of a 3-level LR example correctly""" - # create data for linear regression - if aesara.config.floatX == "float32": - p = "float32" - else: - p = "float64" - np.random.seed(123456) - size = 200 - true_intercept = 1 - true_slope = 2 - sigma = 1 - x = np.linspace(0, 1, size, dtype=p) - # y = a + b*x - true_regression_line = true_intercept + true_slope * x - # add noise - y = true_regression_line + np.random.normal(0, sigma**2, size) - s = np.identity(y.shape[0], dtype=p) - np.fill_diagonal(s, sigma**2) - - # forward model Op - here, just the regression equation - class ForwardModel(Op): - if aesara.config.floatX == "float32": - itypes = [at.fvector] - otypes = [at.fvector] - else: - itypes = [at.dvector] - otypes = [at.dvector] - - def __init__(self, x, pymc_model): - self.x = x - self.pymc_model = pymc_model - - def perform(self, node, inputs, outputs): - intercept = inputs[0][0] - x_coeff = inputs[0][1] - - temp = intercept + x_coeff * x + self.pymc_model.bias.data - with self.pymc_model: - set_data({"model_output": temp}) - outputs[0][0] = np.array(temp) - - # create the coarse models with separate biases - mout = [] - coarse_models = [] - - with Model() as coarse_model_0: - bias = Data("bias", 3.5 * np.ones(y.shape, dtype=p)) - mu_B = MutableData("mu_B", -1.3 * np.ones(y.shape, dtype=p)) - Sigma_B = MutableData("Sigma_B", np.zeros((y.shape[0], y.shape[0]), dtype=p)) - model_output = MutableData("model_output", np.zeros(y.shape, dtype=p)) - Sigma_e = Data("Sigma_e", s) - - # Define priors - intercept = Normal("Intercept", 0, sigma=20) - x_coeff = Normal("x", 0, sigma=20) - - theta = at.as_tensor_variable([intercept, x_coeff]) - - mout.append(ForwardModel(x, coarse_model_0)) - - # Define likelihood - likelihood = MvNormal("y", mu=mout[0](theta) + mu_B, cov=Sigma_e, observed=y) - - coarse_models.append(coarse_model_0) - - with Model() as coarse_model_1: - bias = Data("bias", 2.2 * np.ones(y.shape, dtype=p)) - mu_B = MutableData("mu_B", -2.2 * np.ones(y.shape, dtype=p)) - Sigma_B = MutableData("Sigma_B", np.zeros((y.shape[0], y.shape[0]), dtype=p)) - model_output = MutableData("model_output", np.zeros(y.shape, dtype=p)) - Sigma_e = Data("Sigma_e", s) - - # Define priors - intercept = Normal("Intercept", 0, sigma=20) - x_coeff = Normal("x", 0, sigma=20) - - theta = at.as_tensor_variable([intercept, x_coeff]) - - mout.append(ForwardModel(x, coarse_model_1)) - - # Define likelihood - likelihood = MvNormal("y", mu=mout[1](theta) + mu_B, cov=Sigma_e, observed=y) - - coarse_models.append(coarse_model_1) - - # fine model and inference - with Model() as model: - bias = Data("bias", np.zeros(y.shape, dtype=p)) - model_output = MutableData("model_output", np.zeros(y.shape, dtype=p)) - Sigma_e = Data("Sigma_e", s) - - # Define priors - intercept = Normal("Intercept", 0, sigma=20) - x_coeff = Normal("x", 0, sigma=20) - - theta = at.as_tensor_variable([intercept, x_coeff]) - - mout.append(ForwardModel(x, model)) - - # Define likelihood - likelihood = MvNormal("y", mu=mout[-1](theta), cov=Sigma_e, observed=y) - - step_mlda = MLDA(coarse_models=coarse_models, adaptive_error_model=True) - - trace_mlda = sample( - draws=100, - step=step_mlda, - chains=1, - tune=200, - discard_tuned_samples=True, - random_seed=84759238, - ) - - m0 = step_mlda.step_method_below.model_below.mu_B.get_value() - s0 = step_mlda.step_method_below.model_below.Sigma_B.get_value() - m1 = step_mlda.model_below.mu_B.get_value() - s1 = step_mlda.model_below.Sigma_B.get_value() - - assert np.allclose(m0, -3.5) - assert np.allclose(m1, -2.2) - assert np.allclose(s0, 0, atol=1e-3) - assert np.allclose(s1, 0, atol=1e-3) - - def test_variance_reduction(self): - """ - Test if the right stats are outputed when variance reduction is used in MLDA, - if the output estimates are close (VR estimate vs. standard estimate from - the first chain) and if the variance of VR is lower. Uses a linear regression - model with multiple levels where approximate levels have fewer data. - - """ - # arithmetic precision - if aesara.config.floatX == "float32": - p = "float32" - else: - p = "float64" - - # set up the model and data - seed = 12345 - np.random.seed(seed) - size = 100 - true_intercept = 1 - true_slope = 2 - sigma = 0.1 - x = np.linspace(0, 1, size, dtype=p) - # y = a + b*x - true_regression_line = true_intercept + true_slope * x - # add noise - y = true_regression_line + np.random.normal(0, sigma**2, size) - s = sigma - - x_coarse_0 = x[::3] - y_coarse_0 = y[::3] - x_coarse_1 = x[::2] - y_coarse_1 = y[::2] - - # MCMC parameters - ndraws = 200 - ntune = 100 - nsub = 3 - nchains = 1 - - # define likelihoods with different Q - class Likelihood(Op): - if aesara.config.floatX == "float32": - itypes = [at.fvector] - otypes = [at.fscalar] - else: - itypes = [at.dvector] - otypes = [at.dscalar] - - def __init__(self, x, y, pymc_model): - self.x = x - self.y = y - self.pymc_model = pymc_model - - def perform(self, node, inputs, outputs): - intercept = inputs[0][0] - x_coeff = inputs[0][1] - - temp = np.array(intercept + x_coeff * self.x, dtype=p) - with self.pymc_model: - set_data({"Q": np.array(x_coeff, dtype=p)}) - outputs[0][0] = np.array( - -(0.5 / s**2) * np.sum((temp - self.y) ** 2, dtype=p), dtype=p - ) - - # run four MLDA steppers for all combinations of - # base_sampler and forward model - for stepper in ["Metropolis", "DEMetropolisZ"]: - mout = [] - coarse_models = [] - - with Model() as coarse_model_0: - if aesara.config.floatX == "float32": - Q = MutableData("Q", np.float32(0.0)) - else: - Q = MutableData("Q", np.float64(0.0)) - - # Define priors - intercept = Normal("Intercept", true_intercept, sigma=1) - x_coeff = Normal("x", true_slope, sigma=1) - - theta = at.as_tensor_variable([intercept, x_coeff]) - - mout.append(Likelihood(x_coarse_0, y_coarse_0, coarse_model_0)) - Potential("likelihood", mout[0](theta)) - - coarse_models.append(coarse_model_0) - - with Model() as coarse_model_1: - if aesara.config.floatX == "float32": - Q = MutableData("Q", np.float32(0.0)) - else: - Q = MutableData("Q", np.float64(0.0)) - - # Define priors - intercept = Normal("Intercept", true_intercept, sigma=1) - x_coeff = Normal("x", true_slope, sigma=1) - - theta = at.as_tensor_variable([intercept, x_coeff]) - - mout.append(Likelihood(x_coarse_1, y_coarse_1, coarse_model_1)) - Potential("likelihood", mout[1](theta)) - - coarse_models.append(coarse_model_1) - - with Model() as model: - if aesara.config.floatX == "float32": - Q = MutableData("Q", np.float32(0.0)) - else: - Q = MutableData("Q", np.float64(0.0)) - - # Define priors - intercept = Normal("Intercept", true_intercept, sigma=1) - x_coeff = Normal("x", true_slope, sigma=1) - - theta = at.as_tensor_variable([intercept, x_coeff]) - - mout.append(Likelihood(x, y, model)) - Potential("likelihood", mout[-1](theta)) - - step = MLDA( - coarse_models=coarse_models, - base_sampler=stepper, - subsampling_rates=nsub, - variance_reduction=True, - store_Q_fine=True, - ) - - trace = sample( - draws=ndraws, - step=step, - chains=nchains, - tune=ntune, - cores=1, - discard_tuned_samples=True, - random_seed=seed, - return_inferencedata=False, - ) - - # get fine level stats (standard method) - Q_2 = trace.get_sampler_stats("Q_2").reshape((nchains, ndraws)) - Q_mean_standard = Q_2.mean(axis=1).mean() - Q_se_standard = np.sqrt(Q_2.var() / az.ess(np.array(Q_2, np.float64))) - - # get VR stats - Q_mean_vr, Q_se_vr = extract_Q_estimate(trace, 3) - - # check that returned values are floats and finite. - assert isinstance(Q_mean_standard, np.floating) - assert np.isfinite(Q_mean_standard) - assert isinstance(Q_mean_vr, np.floating) - assert np.isfinite(Q_mean_vr) - assert isinstance(Q_se_standard, np.floating) - assert np.isfinite(Q_se_standard) - assert isinstance(Q_se_vr, np.floating) - assert np.isfinite(Q_se_vr) - - # check consistency of QoI across levels. - Q_1_0 = np.concatenate(trace.get_sampler_stats("Q_1_0")).reshape( - (nchains, ndraws * nsub) - ) - Q_2_1 = np.concatenate(trace.get_sampler_stats("Q_2_1")).reshape((nchains, ndraws)) - # This used to be a scrict zero equality! - assert np.isclose(Q_1_0.mean(axis=1), 0.0, atol=1e-4) - assert np.isclose(Q_2_1.mean(axis=1), 0.0, atol=1e-4) - - class TestRVsAssignmentSteps: """ Test that step methods convert input RVs to respective value vars @@ -1378,7 +661,6 @@ class TestRVsAssignmentSteps: (Slice, {}), (DEMetropolis, {}), (DEMetropolisZ, {}), - # (MLDA, {}), # TODO ], ) def test_continuous_steps(self, step, step_kwargs): diff --git a/pymc/tests/test_types.py b/pymc/tests/test_types.py index b08f0d26be..f5e884abfa 100644 --- a/pymc/tests/test_types.py +++ b/pymc/tests/test_types.py @@ -20,7 +20,7 @@ from pymc.distributions import Normal from pymc.model import Model from pymc.sampling import sample -from pymc.step_methods import MLDA, NUTS, HamiltonianMC, Metropolis, Slice +from pymc.step_methods import NUTS, HamiltonianMC, Metropolis, Slice class TestType: @@ -59,39 +59,3 @@ def test_float32(self): for sampler in self.samplers: with model: sample(draws=10, tune=10, chains=1, step=sampler()) - - @aesara.config.change_flags({"floatX": "float64", "warn_float64": "ignore"}) - def test_float64_MLDA(self): - data = np.random.randn(5) - - with Model() as coarse_model: - x = Normal("x", initval=np.array(1.0, dtype="float64")) - obs = Normal("obs", mu=x, sigma=1.0, observed=data + 0.5) - - with Model() as model: - x = Normal("x", initval=np.array(1.0, dtype="float64")) - obs = Normal("obs", mu=x, sigma=1.0, observed=data) - - assert x.dtype == "float64" - assert obs.dtype == "float64" - - with model: - sample(draws=10, tune=10, chains=1, step=MLDA(coarse_models=[coarse_model])) - - @aesara.config.change_flags({"floatX": "float32", "warn_float64": "warn"}) - def test_float32_MLDA(self): - data = np.random.randn(5).astype("float32") - - with Model() as coarse_model: - x = Normal("x", initval=np.array(1.0, dtype="float32")) - obs = Normal("obs", mu=x, sigma=1.0, observed=data + 0.5) - - with Model() as model: - x = Normal("x", initval=np.array(1.0, dtype="float32")) - obs = Normal("obs", mu=x, sigma=1.0, observed=data) - - assert x.dtype == "float32" - assert obs.dtype == "float32" - - with model: - sample(draws=10, tune=10, chains=1, step=MLDA(coarse_models=[coarse_model]))