From 8ca3ded14caaf5ea6e829200bf4b21d0928b683d Mon Sep 17 00:00:00 2001 From: AlexAndorra Date: Tue, 30 Nov 2021 17:23:59 +0100 Subject: [PATCH 01/51] Replace print statement by AttributeError --- pymc/__init__.py | 1 + pymc/find_optim_prior.py | 96 ++++++++++++++++++++++++++++++++++++++++ 2 files changed, 97 insertions(+) create mode 100644 pymc/find_optim_prior.py diff --git a/pymc/__init__.py b/pymc/__init__.py index 6592d83e35..2923d61797 100644 --- a/pymc/__init__.py +++ b/pymc/__init__.py @@ -84,6 +84,7 @@ def __set_compiler_flags(): from pymc.distributions import * from pymc.distributions import transforms from pymc.exceptions import * +from pymc.find_optim_prior import find_optim_prior from pymc.math import ( expand_packed_triangular, invlogit, diff --git a/pymc/find_optim_prior.py b/pymc/find_optim_prior.py new file mode 100644 index 0000000000..fe6aa1f7a7 --- /dev/null +++ b/pymc/find_optim_prior.py @@ -0,0 +1,96 @@ +# Copyright 2021 The PyMC Developers +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from typing import List + +import aesara +import aesara.tensor as aet +import numpy as np + +from scipy import optimize + +import pymc as pm + +__all__ = ["find_optim_prior"] + + +def find_optim_prior( + pm_dist: pm.Distribution, + lower: float, + upper: float, + init_guess: List[float], + mass: float = 0.95, +) -> np.ndarray: + """ + Find optimal parameters to get `mass` % of probability + of `pm_dist` between `lower` and `upper`. + Note: only works for two-parameter distributions, as there + are exactly two constraints. + + Parameters + ---------- + pm_dist : pm.Distribution + PyMC distribution you want to set a prior on. + Needs to have a ``logcdf`` method implemented in PyMC. + lower : float + Lower bound to get `mass` % of probability of `pm_dist`. + upper : float + Upper bound to get `mass` % of probability of `pm_dist`. + init_guess: List[float] + Initial guess for ``scipy.optimize.least_squares`` to find the + optimal parameters of `pm_dist` fitting the interval constraint. + mass: float, default to 0.95 + Share of the probability mass we want between ``lower`` and ``upper``. + Defaults to 95%. + + Returns + ------- + The optimized distribution parameters. + """ + if len(pm_dist.rv_op.ndims_params) != 2: + raise NotImplementedError( + "This function only works only works for two-parameter distributions, as there are " + "exactly two constraints." + ) + + dist_params = aet.vector("dist_params") + lower_, upper_ = aet.scalars("lower", "upper") + + dist_ = pm_dist.dist(*[dist_params[i] for i in range(len(init_guess))]) + try: + logcdf_lower = pm.logcdf(dist_, lower_) + logcdf_upper = pm.logcdf(dist_, upper_) + except AttributeError: + raise AttributeError( + f"You cannot use `find_optim_params` with {pm_dist} -- it doesn't have a logcdf " + f"method yet. Open an issue or, even better, a PR on PyMC repo if you really need it." + ) + + alpha = 1 - mass + out = [logcdf_lower - np.log(alpha / 2), logcdf_upper - np.log(1 - alpha / 2)] + logcdf = aesara.function([dist_params, lower_, upper_], out) + + try: + symb_grad = aet.as_tensor_variable([pm.gradient(o, [dist_params]) for o in out]) + jac = aesara.function([dist_params, lower_, upper_], symb_grad) + + # when PyMC cannot compute the gradient + except Exception: + jac = "2-point" + + opt = optimize.least_squares(logcdf, init_guess, jac=jac, args=(lower, upper)) + if not opt.success: + raise ValueError("Optimization of parameters failed.") + + return opt.x From 9dc0096f34aa332fd3a9d6bf4cdb81bb948e8a8c Mon Sep 17 00:00:00 2001 From: AlexAndorra Date: Tue, 30 Nov 2021 17:40:12 +0100 Subject: [PATCH 02/51] pre-commit formatting --- pymc/find_optim_prior.py | 6 +++--- pymc/tests/test_util.py | 14 ++++++++++++++ 2 files changed, 17 insertions(+), 3 deletions(-) diff --git a/pymc/find_optim_prior.py b/pymc/find_optim_prior.py index fe6aa1f7a7..365cc2a5d1 100644 --- a/pymc/find_optim_prior.py +++ b/pymc/find_optim_prior.py @@ -56,12 +56,12 @@ def find_optim_prior( Returns ------- - The optimized distribution parameters. + The optimized distribution parameters as a numpy array. """ if len(pm_dist.rv_op.ndims_params) != 2: raise NotImplementedError( - "This function only works only works for two-parameter distributions, as there are " - "exactly two constraints." + "This function only works for two-parameter distributions, as there are exactly two " + "constraints. " ) dist_params = aet.vector("dist_params") diff --git a/pymc/tests/test_util.py b/pymc/tests/test_util.py index c0500d3b97..724421ef39 100644 --- a/pymc/tests/test_util.py +++ b/pymc/tests/test_util.py @@ -142,3 +142,17 @@ def fn(a=UNSET): help(fn) captured = capsys.readouterr() assert "a=UNSET" in captured.out + + +def test_find_optim_prior(): + MASS = 0.95 + opt_params = pm.find_optim_prior(pm.Gamma, lower=0.1, upper=0.4, mass=MASS, init_guess=[1, 10]) + np.testing.assert_allclose(opt_params, np.array([8.47481597, 37.65435601])) + + opt_params = pm.find_optim_prior( + pm.Normal, lower=155, upper=180, mass=MASS, init_guess=[170, 3] + ) + np.testing.assert_allclose(opt_params, np.array([167.5000001, 6.37766828])) + + with pytest.raises(NotImplementedError, match="only works for two-parameter distributions"): + pm.find_optim_prior(pm.Exponential, lower=0.1, upper=0.4, mass=MASS, init_guess=[1]) From 9675e4f1104e33ff0eca65b8fbc8a702cf682c5c Mon Sep 17 00:00:00 2001 From: AlexAndorra Date: Tue, 30 Nov 2021 17:54:44 +0100 Subject: [PATCH 03/51] Mention in release notes --- RELEASE-NOTES.md | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/RELEASE-NOTES.md b/RELEASE-NOTES.md index f3035fe825..e17c9f9de8 100644 --- a/RELEASE-NOTES.md +++ b/RELEASE-NOTES.md @@ -97,7 +97,8 @@ This includes API changes we did not warn about since at least `3.11.0` (2021-01 - Added partial dependence plots and individual conditional expectation plots [5091](https://github.com/pymc-devs/pymc3/pull/5091). - Modify how particle weights are computed. This improves accuracy of the modeled function (see [5177](https://github.com/pymc-devs/pymc3/pull/5177)). - `pm.Data` now passes additional kwargs to `aesara.shared`. [#5098](https://github.com/pymc-devs/pymc/pull/5098) -- ... +- The new `pm.find_optim_prior` function can be used to find the optimal prior parameters of a distribution under some + constraints (e.g lower and upper bound). See [#5231](https://github.com/pymc-devs/pymc/pull/5231). ### Internal changes From d13236460bb2760809de8ca53f8d2517343d37dd Mon Sep 17 00:00:00 2001 From: AlexAndorra Date: Wed, 1 Dec 2021 16:52:38 +0100 Subject: [PATCH 04/51] Handle 1-param and 3-param distributions --- pymc/find_optim_prior.py | 64 +++++++++++++++++++++++++++++++--------- 1 file changed, 50 insertions(+), 14 deletions(-) diff --git a/pymc/find_optim_prior.py b/pymc/find_optim_prior.py index 365cc2a5d1..520c19c8f0 100644 --- a/pymc/find_optim_prior.py +++ b/pymc/find_optim_prior.py @@ -12,7 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -from typing import List +from typing import Dict, Optional import aesara import aesara.tensor as aet @@ -29,14 +29,16 @@ def find_optim_prior( pm_dist: pm.Distribution, lower: float, upper: float, - init_guess: List[float], + init_guess: Dict[str, float], mass: float = 0.95, -) -> np.ndarray: + fixed_params: Optional[Dict[str, float]] = None, +) -> Dict[str, float]: """ Find optimal parameters to get `mass` % of probability of `pm_dist` between `lower` and `upper`. - Note: only works for two-parameter distributions, as there - are exactly two constraints. + Note: only works for one- and two-parameter distributions, as there + are exactly two constraints. Fix some combination of parameters + if you want to use it on >=3-parameter distributions. Parameters ---------- @@ -47,27 +49,50 @@ def find_optim_prior( Lower bound to get `mass` % of probability of `pm_dist`. upper : float Upper bound to get `mass` % of probability of `pm_dist`. - init_guess: List[float] + init_guess: Dict[str, float] Initial guess for ``scipy.optimize.least_squares`` to find the optimal parameters of `pm_dist` fitting the interval constraint. + Must be a dictionary with the name of the PyMC distribution's + parameter as keys and the initial guess as values. mass: float, default to 0.95 Share of the probability mass we want between ``lower`` and ``upper``. Defaults to 95%. + fixed_params: Dict[str, float], Optional, default None + Only used when `pm_dist` has at least three parameters. + Dictionary of fixed parameters, so that there are only 2 to optimize. + For instance, for a StudenT, you fix nu to a constant and get the optimized + mu and sigma. Returns ------- - The optimized distribution parameters as a numpy array. + The optimized distribution parameters as a dictionary with the parameters' + name as key and the optimized value as value. """ - if len(pm_dist.rv_op.ndims_params) != 2: - raise NotImplementedError( - "This function only works for two-parameter distributions, as there are exactly two " - "constraints. " + if len(init_guess) > 2: + if (fixed_params is None) or (len(fixed_params) < (len(pm_dist.rv_op.ndims_params) - 2)): + raise NotImplementedError( + "This function can only optimize two parameters. " + f"{pm_dist} has {len(pm_dist.rv_op.ndims_params)} parameters. " + f"You need to fix {len(pm_dist.rv_op.ndims_params) - 2} parameters in the " + "`fixed_params` dictionary." + ) + elif (len(init_guess) < 2) and (len(init_guess) < len(pm_dist.rv_op.ndims_params)): + raise ValueError( + f"{pm_dist} has {len(pm_dist.rv_op.ndims_params)} parameters, but you provided only " + f"{len(init_guess)} initial guess. You need to provide 2." ) dist_params = aet.vector("dist_params") + params_to_optim = { + arg_name: dist_params[i] for arg_name, i in zip(init_guess.keys(), range(len(init_guess))) + } lower_, upper_ = aet.scalars("lower", "upper") - dist_ = pm_dist.dist(*[dist_params[i] for i in range(len(init_guess))]) + if fixed_params is not None: + dist_ = pm_dist.dist(**(params_to_optim | fixed_params)) + else: + dist_ = pm_dist.dist(**params_to_optim) + try: logcdf_lower = pm.logcdf(dist_, lower_) logcdf_upper = pm.logcdf(dist_, upper_) @@ -89,8 +114,19 @@ def find_optim_prior( except Exception: jac = "2-point" - opt = optimize.least_squares(logcdf, init_guess, jac=jac, args=(lower, upper)) + opt = optimize.least_squares(logcdf, x0=list(init_guess.values()), jac=jac, args=(lower, upper)) if not opt.success: raise ValueError("Optimization of parameters failed.") - return opt.x + if fixed_params is not None: + return { + param_name: param_value for param_name, param_value in zip(init_guess.keys(), opt.x) + } | fixed_params + else: + return { + param_name: param_value for param_name, param_value in zip(init_guess.keys(), opt.x) + } + + +# check that Scipy's parametrization is used +# handle 1-param case From 6f9ccd42e0b7b2ace152d7b44a4609ba9dc3250b Mon Sep 17 00:00:00 2001 From: AlexAndorra Date: Wed, 1 Dec 2021 16:54:26 +0100 Subject: [PATCH 05/51] Update tests --- pymc/tests/test_util.py | 55 ++++++++++++++++++++++++++++++++++++----- 1 file changed, 49 insertions(+), 6 deletions(-) diff --git a/pymc/tests/test_util.py b/pymc/tests/test_util.py index 724421ef39..664face7c7 100644 --- a/pymc/tests/test_util.py +++ b/pymc/tests/test_util.py @@ -146,13 +146,56 @@ def fn(a=UNSET): def test_find_optim_prior(): MASS = 0.95 - opt_params = pm.find_optim_prior(pm.Gamma, lower=0.1, upper=0.4, mass=MASS, init_guess=[1, 10]) - np.testing.assert_allclose(opt_params, np.array([8.47481597, 37.65435601])) + # normal case opt_params = pm.find_optim_prior( - pm.Normal, lower=155, upper=180, mass=MASS, init_guess=[170, 3] + pm.Gamma, lower=0.1, upper=0.4, mass=MASS, init_guess={"alpha": 1, "beta": 10} ) - np.testing.assert_allclose(opt_params, np.array([167.5000001, 6.37766828])) + np.testing.assert_allclose(np.asarray(opt_params.values()), np.array([8.47481597, 37.65435601])) - with pytest.raises(NotImplementedError, match="only works for two-parameter distributions"): - pm.find_optim_prior(pm.Exponential, lower=0.1, upper=0.4, mass=MASS, init_guess=[1]) + # normal case, other distribution + opt_params = pm.find_optim_prior( + pm.Normal, lower=155, upper=180, mass=MASS, init_guess={"mu": 170, "sigma": 3} + ) + np.testing.assert_allclose(np.asarray(opt_params.values()), np.array([167.5000001, 6.37766828])) + + # 1-param case + opt_params = pm.find_optim_prior( + pm.Exponential, lower=0.1, upper=0.4, mass=MASS, init_guess={"lam": 10} + ) + np.testing.assert_allclose(np.asarray(opt_params.values()), np.array([0.79929324])) + + # 3-param case + opt_params = pm.find_optim_prior( + pm.StudentT, + lower=0.1, + upper=0.4, + mass=MASS, + init_guess={"mu": 170, "sigma": 3}, + fixed_params={"nu": 7}, + ) + np.testing.assert_allclose(np.asarray(opt_params.values()), np.array([0.25, 0.06343503])) + + with pytest.raises(ValueError, match="parameters, but you provided only"): + pm.find_optim_prior( + pm.Gamma, + lower=0.1, + upper=0.4, + mass=MASS, + init_guess={"alpha": 1}, + fixed_params={"beta": 10}, + ) + + with pytest.raises(TypeError, match="required positional argument"): + pm.find_optim_prior( + pm.StudentT, lower=0.1, upper=0.4, mass=MASS, init_guess={"mu": 170, "sigma": 3} + ) + + with pytest.raises(NotImplementedError, match="This function can only optimize two parameters"): + pm.find_optim_prior( + pm.StudentT, + lower=0.1, + upper=0.4, + mass=MASS, + init_guess={"mu": 170, "sigma": 3, "nu": 7}, + ) From fea664301c68219729e3827da8c0df501e76a51d Mon Sep 17 00:00:00 2001 From: AlexAndorra Date: Wed, 1 Dec 2021 17:47:51 +0100 Subject: [PATCH 06/51] Fix some wording --- pymc/find_optim_prior.py | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/pymc/find_optim_prior.py b/pymc/find_optim_prior.py index 520c19c8f0..4ca5e3c0cd 100644 --- a/pymc/find_optim_prior.py +++ b/pymc/find_optim_prior.py @@ -98,8 +98,9 @@ def find_optim_prior( logcdf_upper = pm.logcdf(dist_, upper_) except AttributeError: raise AttributeError( - f"You cannot use `find_optim_params` with {pm_dist} -- it doesn't have a logcdf " - f"method yet. Open an issue or, even better, a PR on PyMC repo if you really need it." + f"You cannot use `find_optim_prior` with {pm_dist} -- it doesn't have a logcdf " + "method yet. Open an issue or, even better, a pull request on PyMC repo if you really " + "need it." ) alpha = 1 - mass @@ -126,7 +127,3 @@ def find_optim_prior( return { param_name: param_value for param_name, param_value in zip(init_guess.keys(), opt.x) } - - -# check that Scipy's parametrization is used -# handle 1-param case From 524a900a604c5b5109683568d1f75c98309e551d Mon Sep 17 00:00:00 2001 From: AlexAndorra Date: Fri, 3 Dec 2021 13:32:20 +0100 Subject: [PATCH 07/51] pre-commit formatting --- pymc/find_optim_prior.py | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/pymc/find_optim_prior.py b/pymc/find_optim_prior.py index 4ca5e3c0cd..8a99c2f0a1 100644 --- a/pymc/find_optim_prior.py +++ b/pymc/find_optim_prior.py @@ -11,6 +11,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. +import warnings from typing import Dict, Optional @@ -70,17 +71,13 @@ def find_optim_prior( """ if len(init_guess) > 2: if (fixed_params is None) or (len(fixed_params) < (len(pm_dist.rv_op.ndims_params) - 2)): - raise NotImplementedError( - "This function can only optimize two parameters. " - f"{pm_dist} has {len(pm_dist.rv_op.ndims_params)} parameters. " - f"You need to fix {len(pm_dist.rv_op.ndims_params) - 2} parameters in the " - "`fixed_params` dictionary." + warnings.warn( + f"It seems like {pm_dist} has {len(pm_dist.rv_op.ndims_params)} parameters. " + "Note that `pm.find_optim_prior` can only optimize two parameters, so there may " + "not be a unique solution in your use-case. " + f"Consider fixing {len(pm_dist.rv_op.ndims_params) - 2} parameters in the " + f"`fixed_params` dictionary. " ) - elif (len(init_guess) < 2) and (len(init_guess) < len(pm_dist.rv_op.ndims_params)): - raise ValueError( - f"{pm_dist} has {len(pm_dist.rv_op.ndims_params)} parameters, but you provided only " - f"{len(init_guess)} initial guess. You need to provide 2." - ) dist_params = aet.vector("dist_params") params_to_optim = { @@ -103,8 +100,7 @@ def find_optim_prior( "need it." ) - alpha = 1 - mass - out = [logcdf_lower - np.log(alpha / 2), logcdf_upper - np.log(1 - alpha / 2)] + out = pm.math.logdiffexp(logcdf_upper, logcdf_lower) - np.log(mass) logcdf = aesara.function([dist_params, lower_, upper_], out) try: @@ -127,3 +123,7 @@ def find_optim_prior( return { param_name: param_value for param_name, param_value in zip(init_guess.keys(), opt.x) } + + +# tests +# add warning when mass is lower than expected -- try another initial guess? From 91174b9b49ecfd86af049b5cd464de5c1e99565e Mon Sep 17 00:00:00 2001 From: AlexAndorra Date: Fri, 3 Dec 2021 16:38:46 +0100 Subject: [PATCH 08/51] Only raise UserWarning when mass_in_interval not optimal --- pymc/find_optim_prior.py | 30 +++++++++++++++++------------- 1 file changed, 17 insertions(+), 13 deletions(-) diff --git a/pymc/find_optim_prior.py b/pymc/find_optim_prior.py index 8a99c2f0a1..a1e81c2865 100644 --- a/pymc/find_optim_prior.py +++ b/pymc/find_optim_prior.py @@ -69,15 +69,9 @@ def find_optim_prior( The optimized distribution parameters as a dictionary with the parameters' name as key and the optimized value as value. """ - if len(init_guess) > 2: - if (fixed_params is None) or (len(fixed_params) < (len(pm_dist.rv_op.ndims_params) - 2)): - warnings.warn( - f"It seems like {pm_dist} has {len(pm_dist.rv_op.ndims_params)} parameters. " - "Note that `pm.find_optim_prior` can only optimize two parameters, so there may " - "not be a unique solution in your use-case. " - f"Consider fixing {len(pm_dist.rv_op.ndims_params) - 2} parameters in the " - f"`fixed_params` dictionary. " - ) + # exit when any parameter is not scalar: + if np.any(np.asarray(pm_dist.rv_op.ndims_params) != 0): + raise NotImplementedError dist_params = aet.vector("dist_params") params_to_optim = { @@ -116,14 +110,24 @@ def find_optim_prior( raise ValueError("Optimization of parameters failed.") if fixed_params is not None: - return { + opt_params = { param_name: param_value for param_name, param_value in zip(init_guess.keys(), opt.x) } | fixed_params else: - return { + opt_params = { param_name: param_value for param_name, param_value in zip(init_guess.keys(), opt.x) } + opt_dist = pm_dist.dist(**opt_params) + mass_in_interval = ( + pm.math.exp(pm.logcdf(opt_dist, upper)) - pm.math.exp(pm.logcdf(opt_dist, lower)) + ).eval() + if (np.abs(mass_in_interval - mass)) >= 0.01: + warnings.warn( + f"Final optimization has {mass_in_interval * 100:.0f}% of probability mass between " + f"{lower} and {upper} instead of the requested {mass * 100:.0f}%." + "You may need to use a more flexible distribution, change the fixed paramaters in the " + "`fixed_params` dictionary, or provide better initial guesses." + ) -# tests -# add warning when mass is lower than expected -- try another initial guess? + return opt_params From 29741f1d1af546a6d213cf473b45769f31080483 Mon Sep 17 00:00:00 2001 From: AlexAndorra Date: Fri, 3 Dec 2021 16:45:29 +0100 Subject: [PATCH 09/51] Raise NotImplementedError for non-scalar params --- pymc/find_optim_prior.py | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/pymc/find_optim_prior.py b/pymc/find_optim_prior.py index a1e81c2865..8bc6ce4228 100644 --- a/pymc/find_optim_prior.py +++ b/pymc/find_optim_prior.py @@ -71,7 +71,10 @@ def find_optim_prior( """ # exit when any parameter is not scalar: if np.any(np.asarray(pm_dist.rv_op.ndims_params) != 0): - raise NotImplementedError + raise NotImplementedError( + "`pm.find_optim_prior` does not work with non-scalar parameters yet.\n" + "Feel free to open a pull request on PyMC repo if you really need this feature." + ) dist_params = aet.vector("dist_params") params_to_optim = { @@ -90,7 +93,7 @@ def find_optim_prior( except AttributeError: raise AttributeError( f"You cannot use `find_optim_prior` with {pm_dist} -- it doesn't have a logcdf " - "method yet. Open an issue or, even better, a pull request on PyMC repo if you really " + "method yet.\nOpen an issue or, even better, a pull request on PyMC repo if you really " "need it." ) @@ -125,8 +128,8 @@ def find_optim_prior( if (np.abs(mass_in_interval - mass)) >= 0.01: warnings.warn( f"Final optimization has {mass_in_interval * 100:.0f}% of probability mass between " - f"{lower} and {upper} instead of the requested {mass * 100:.0f}%." - "You may need to use a more flexible distribution, change the fixed paramaters in the " + f"{lower} and {upper} instead of the requested {mass * 100:.0f}%.\n" + "You may need to use a more flexible distribution, change the fixed parameters in the " "`fixed_params` dictionary, or provide better initial guesses." ) From 1ad42975eb90adce565f36a753daa60e69f6ae19 Mon Sep 17 00:00:00 2001 From: AlexAndorra Date: Fri, 3 Dec 2021 17:03:45 +0100 Subject: [PATCH 10/51] Remove pipe operator for old python versions --- pymc/find_optim_prior.py | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/pymc/find_optim_prior.py b/pymc/find_optim_prior.py index 8bc6ce4228..3dea6f47a9 100644 --- a/pymc/find_optim_prior.py +++ b/pymc/find_optim_prior.py @@ -112,15 +112,14 @@ def find_optim_prior( if not opt.success: raise ValueError("Optimization of parameters failed.") + # save optimal parameters + opt_params = { + param_name: param_value for param_name, param_value in zip(init_guess.keys(), opt.x) + } if fixed_params is not None: - opt_params = { - param_name: param_value for param_name, param_value in zip(init_guess.keys(), opt.x) - } | fixed_params - else: - opt_params = { - param_name: param_value for param_name, param_value in zip(init_guess.keys(), opt.x) - } + opt_params.update(fixed_params) + # check mass in interval is not too far from `mass` opt_dist = pm_dist.dist(**opt_params) mass_in_interval = ( pm.math.exp(pm.logcdf(opt_dist, upper)) - pm.math.exp(pm.logcdf(opt_dist, lower)) From a708e6d63138d63d0f2c4226e1cf3d8959d298f8 Mon Sep 17 00:00:00 2001 From: AlexAndorra Date: Fri, 3 Dec 2021 17:04:31 +0100 Subject: [PATCH 11/51] Update tests --- pymc/tests/test_util.py | 61 +++++++++++++++++++++++++++++------------ 1 file changed, 44 insertions(+), 17 deletions(-) diff --git a/pymc/tests/test_util.py b/pymc/tests/test_util.py index 664face7c7..691e61f2b6 100644 --- a/pymc/tests/test_util.py +++ b/pymc/tests/test_util.py @@ -147,25 +147,23 @@ def fn(a=UNSET): def test_find_optim_prior(): MASS = 0.95 - # normal case + # Gamma, normal case opt_params = pm.find_optim_prior( pm.Gamma, lower=0.1, upper=0.4, mass=MASS, init_guess={"alpha": 1, "beta": 10} ) - np.testing.assert_allclose(np.asarray(opt_params.values()), np.array([8.47481597, 37.65435601])) + np.testing.assert_allclose( + list(opt_params.values()), np.array([8.506023352404027, 37.59626616198404]) + ) - # normal case, other distribution + # Normal, normal case opt_params = pm.find_optim_prior( pm.Normal, lower=155, upper=180, mass=MASS, init_guess={"mu": 170, "sigma": 3} ) - np.testing.assert_allclose(np.asarray(opt_params.values()), np.array([167.5000001, 6.37766828])) - - # 1-param case - opt_params = pm.find_optim_prior( - pm.Exponential, lower=0.1, upper=0.4, mass=MASS, init_guess={"lam": 10} + np.testing.assert_allclose( + list(opt_params.values()), np.array([170.76059047372624, 5.542895384602784]) ) - np.testing.assert_allclose(np.asarray(opt_params.values()), np.array([0.79929324])) - # 3-param case + # Student, works as expected opt_params = pm.find_optim_prior( pm.StudentT, lower=0.1, @@ -174,9 +172,36 @@ def test_find_optim_prior(): init_guess={"mu": 170, "sigma": 3}, fixed_params={"nu": 7}, ) - np.testing.assert_allclose(np.asarray(opt_params.values()), np.array([0.25, 0.06343503])) + assert "nu" in opt_params + np.testing.assert_allclose( + list(opt_params.values()), np.array([0.24995405785756986, 0.06343501657095188, 7]) + ) - with pytest.raises(ValueError, match="parameters, but you provided only"): + # Student not deterministic but without warning + with pytest.warns(None) as record: + pm.find_optim_prior( + pm.StudentT, + lower=0, + upper=1, + mass=MASS, + init_guess={"mu": 5, "sigma": 2, "nu": 7}, + ) + assert len(record) == 0 + + # Exponential without warning + with pytest.warns(None) as record: + opt_params = pm.find_optim_prior( + pm.Exponential, lower=0, upper=1, mass=MASS, init_guess={"lam": 1} + ) + assert len(record) == 0 + np.testing.assert_allclose(list(opt_params.values()), np.array([2.9957322673241604])) + + # Exponential too constraining + with pytest.warns(UserWarning, match="instead of the requested 95%"): + pm.find_optim_prior(pm.Exponential, lower=0.1, upper=1, mass=MASS, init_guess={"lam": 1}) + + # Gamma too constraining + with pytest.warns(UserWarning, match="instead of the requested 95%"): pm.find_optim_prior( pm.Gamma, lower=0.1, @@ -186,16 +211,18 @@ def test_find_optim_prior(): fixed_params={"beta": 10}, ) + # missing param with pytest.raises(TypeError, match="required positional argument"): pm.find_optim_prior( pm.StudentT, lower=0.1, upper=0.4, mass=MASS, init_guess={"mu": 170, "sigma": 3} ) - with pytest.raises(NotImplementedError, match="This function can only optimize two parameters"): + # non-scalar params + with pytest.raises(NotImplementedError, match="does not work with non-scalar parameters yet"): pm.find_optim_prior( - pm.StudentT, - lower=0.1, - upper=0.4, + pm.MvNormal, + lower=0, + upper=1, mass=MASS, - init_guess={"mu": 170, "sigma": 3, "nu": 7}, + init_guess={"mu": 5, "cov": np.asarray([[1, 0.2], [0.2, 1]])}, ) From e1c5125c374ac098a3d9574d011cf6f2326068b7 Mon Sep 17 00:00:00 2001 From: AlexAndorra Date: Tue, 7 Dec 2021 15:03:33 +0100 Subject: [PATCH 12/51] Add test with discrete distrib & wrap in pytest.warns(None) --- pymc/tests/test_util.py | 48 +++++++++++++++++++++++++++++------------ 1 file changed, 34 insertions(+), 14 deletions(-) diff --git a/pymc/tests/test_util.py b/pymc/tests/test_util.py index 691e61f2b6..d19eeea919 100644 --- a/pymc/tests/test_util.py +++ b/pymc/tests/test_util.py @@ -148,30 +148,36 @@ def test_find_optim_prior(): MASS = 0.95 # Gamma, normal case - opt_params = pm.find_optim_prior( - pm.Gamma, lower=0.1, upper=0.4, mass=MASS, init_guess={"alpha": 1, "beta": 10} - ) + with pytest.warns(None) as record: + opt_params = pm.find_optim_prior( + pm.Gamma, lower=0.1, upper=0.4, mass=MASS, init_guess={"alpha": 1, "beta": 10} + ) + assert len(record) == 0 np.testing.assert_allclose( list(opt_params.values()), np.array([8.506023352404027, 37.59626616198404]) ) # Normal, normal case - opt_params = pm.find_optim_prior( - pm.Normal, lower=155, upper=180, mass=MASS, init_guess={"mu": 170, "sigma": 3} - ) + with pytest.warns(None) as record: + opt_params = pm.find_optim_prior( + pm.Normal, lower=155, upper=180, mass=MASS, init_guess={"mu": 170, "sigma": 3} + ) + assert len(record) == 0 np.testing.assert_allclose( list(opt_params.values()), np.array([170.76059047372624, 5.542895384602784]) ) # Student, works as expected - opt_params = pm.find_optim_prior( - pm.StudentT, - lower=0.1, - upper=0.4, - mass=MASS, - init_guess={"mu": 170, "sigma": 3}, - fixed_params={"nu": 7}, - ) + with pytest.warns(None) as record: + opt_params = pm.find_optim_prior( + pm.StudentT, + lower=0.1, + upper=0.4, + mass=MASS, + init_guess={"mu": 170, "sigma": 3}, + fixed_params={"nu": 7}, + ) + assert len(record) == 0 assert "nu" in opt_params np.testing.assert_allclose( list(opt_params.values()), np.array([0.24995405785756986, 0.06343501657095188, 7]) @@ -188,6 +194,14 @@ def test_find_optim_prior(): ) assert len(record) == 0 + # Binomial, works as expected + with pytest.warns(None) as record: + opt_params = pm.find_optim_prior( + pm.Binomial, lower=2, upper=8, mass=MASS, init_guess={"p": 0.5}, fixed_params={"n": 10} + ) + assert len(record) == 0 + np.testing.assert_allclose(list(opt_params.values()), np.array([0.5575067955294625, 10])) + # Exponential without warning with pytest.warns(None) as record: opt_params = pm.find_optim_prior( @@ -211,6 +225,12 @@ def test_find_optim_prior(): fixed_params={"beta": 10}, ) + # Binomial too constraining + with pytest.warns(UserWarning, match="instead of the requested 95%"): + pm.find_optim_prior( + pm.Binomial, lower=0, upper=2, mass=MASS, init_guess={"p": 0.8}, fixed_params={"n": 10} + ) + # missing param with pytest.raises(TypeError, match="required positional argument"): pm.find_optim_prior( From bc9b543e1cdfcbc35650660538fb35baa401afb7 Mon Sep 17 00:00:00 2001 From: AlexAndorra Date: Tue, 7 Dec 2021 15:34:41 +0100 Subject: [PATCH 13/51] Remove pipe operator for good --- pymc/find_optim_prior.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/pymc/find_optim_prior.py b/pymc/find_optim_prior.py index 3dea6f47a9..fd90fe0a3e 100644 --- a/pymc/find_optim_prior.py +++ b/pymc/find_optim_prior.py @@ -83,9 +83,9 @@ def find_optim_prior( lower_, upper_ = aet.scalars("lower", "upper") if fixed_params is not None: - dist_ = pm_dist.dist(**(params_to_optim | fixed_params)) - else: - dist_ = pm_dist.dist(**params_to_optim) + params_to_optim.update(fixed_params) + + dist_ = pm_dist.dist(**params_to_optim) try: logcdf_lower = pm.logcdf(dist_, lower_) From 18ad975f3da943ba21955478b712aadf19738338 Mon Sep 17 00:00:00 2001 From: AlexAndorra Date: Tue, 7 Dec 2021 16:49:18 +0100 Subject: [PATCH 14/51] Fix TypeError in dist_params --- pymc/find_optim_prior.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pymc/find_optim_prior.py b/pymc/find_optim_prior.py index fd90fe0a3e..e4cfb8fffc 100644 --- a/pymc/find_optim_prior.py +++ b/pymc/find_optim_prior.py @@ -76,7 +76,7 @@ def find_optim_prior( "Feel free to open a pull request on PyMC repo if you really need this feature." ) - dist_params = aet.vector("dist_params") + dist_params = aet.vector("dist_params", dtype=aesara.config.floatX) params_to_optim = { arg_name: dist_params[i] for arg_name, i in zip(init_guess.keys(), range(len(init_guess))) } From e92d6d87400dbffd092213255ab47ca64ea05c01 Mon Sep 17 00:00:00 2001 From: AlexAndorra Date: Tue, 7 Dec 2021 16:49:45 +0100 Subject: [PATCH 15/51] Relax tolerance for tests --- pymc/tests/test_util.py | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/pymc/tests/test_util.py b/pymc/tests/test_util.py index d19eeea919..d9e77603db 100644 --- a/pymc/tests/test_util.py +++ b/pymc/tests/test_util.py @@ -154,7 +154,7 @@ def test_find_optim_prior(): ) assert len(record) == 0 np.testing.assert_allclose( - list(opt_params.values()), np.array([8.506023352404027, 37.59626616198404]) + list(opt_params.values()), np.array([8.506023352404027, 37.59626616198404]), rtol=1e-03 ) # Normal, normal case @@ -164,7 +164,7 @@ def test_find_optim_prior(): ) assert len(record) == 0 np.testing.assert_allclose( - list(opt_params.values()), np.array([170.76059047372624, 5.542895384602784]) + list(opt_params.values()), np.array([170.76059047372624, 5.542895384602784]), rtol=1e-03 ) # Student, works as expected @@ -180,7 +180,9 @@ def test_find_optim_prior(): assert len(record) == 0 assert "nu" in opt_params np.testing.assert_allclose( - list(opt_params.values()), np.array([0.24995405785756986, 0.06343501657095188, 7]) + list(opt_params.values()), + np.array([0.24995405785756986, 0.06343501657095188, 7]), + rtol=1e-03, ) # Student not deterministic but without warning @@ -200,7 +202,9 @@ def test_find_optim_prior(): pm.Binomial, lower=2, upper=8, mass=MASS, init_guess={"p": 0.5}, fixed_params={"n": 10} ) assert len(record) == 0 - np.testing.assert_allclose(list(opt_params.values()), np.array([0.5575067955294625, 10])) + np.testing.assert_allclose( + list(opt_params.values()), np.array([0.5575067955294625, 10]), rtol=1e-03 + ) # Exponential without warning with pytest.warns(None) as record: @@ -208,7 +212,9 @@ def test_find_optim_prior(): pm.Exponential, lower=0, upper=1, mass=MASS, init_guess={"lam": 1} ) assert len(record) == 0 - np.testing.assert_allclose(list(opt_params.values()), np.array([2.9957322673241604])) + np.testing.assert_allclose( + list(opt_params.values()), np.array([2.9957322673241604]), rtol=1e-03 + ) # Exponential too constraining with pytest.warns(UserWarning, match="instead of the requested 95%"): From 94b406b733e9ea42f9cd9da87ddd4d807f12cab8 Mon Sep 17 00:00:00 2001 From: AlexAndorra Date: Tue, 14 Dec 2021 17:34:27 +0100 Subject: [PATCH 16/51] Force float64 config in find_optim_prior --- pymc/find_optim_prior.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/pymc/find_optim_prior.py b/pymc/find_optim_prior.py index e4cfb8fffc..1b45415c32 100644 --- a/pymc/find_optim_prior.py +++ b/pymc/find_optim_prior.py @@ -76,7 +76,9 @@ def find_optim_prior( "Feel free to open a pull request on PyMC repo if you really need this feature." ) - dist_params = aet.vector("dist_params", dtype=aesara.config.floatX) + # force float64 config + aesara.config.floatX = "float64" + dist_params = aet.vector("dist_params") params_to_optim = { arg_name: dist_params[i] for arg_name, i in zip(init_guess.keys(), range(len(init_guess))) } From 76dbb1f180a70887cfd6841173ca8ca1c8deb577 Mon Sep 17 00:00:00 2001 From: AlexAndorra Date: Tue, 14 Dec 2021 17:36:48 +0100 Subject: [PATCH 17/51] Rename file name to func_utils.py --- pymc/__init__.py | 2 +- pymc/{find_optim_prior.py => func_utils.py} | 0 2 files changed, 1 insertion(+), 1 deletion(-) rename pymc/{find_optim_prior.py => func_utils.py} (100%) diff --git a/pymc/__init__.py b/pymc/__init__.py index 2923d61797..2f60db9c67 100644 --- a/pymc/__init__.py +++ b/pymc/__init__.py @@ -84,7 +84,7 @@ def __set_compiler_flags(): from pymc.distributions import * from pymc.distributions import transforms from pymc.exceptions import * -from pymc.find_optim_prior import find_optim_prior +from pymc.func_utils import find_optim_prior from pymc.math import ( expand_packed_triangular, invlogit, diff --git a/pymc/find_optim_prior.py b/pymc/func_utils.py similarity index 100% rename from pymc/find_optim_prior.py rename to pymc/func_utils.py From 53bfc005ae28fa06ef5b5b97dc54456964902142 Mon Sep 17 00:00:00 2001 From: AlexAndorra Date: Tue, 30 Nov 2021 17:23:59 +0100 Subject: [PATCH 18/51] Replace print statement by AttributeError --- pymc/__init__.py | 1 + pymc/find_optim_prior.py | 96 ++++++++++++++++++++++++++++++++++++++++ 2 files changed, 97 insertions(+) create mode 100644 pymc/find_optim_prior.py diff --git a/pymc/__init__.py b/pymc/__init__.py index 1f7665d7ee..f39cb6b590 100644 --- a/pymc/__init__.py +++ b/pymc/__init__.py @@ -81,6 +81,7 @@ def __set_compiler_flags(): from pymc.distributions import * from pymc.distributions import transforms from pymc.exceptions import * +from pymc.find_optim_prior import find_optim_prior from pymc.math import ( expand_packed_triangular, invlogit, diff --git a/pymc/find_optim_prior.py b/pymc/find_optim_prior.py new file mode 100644 index 0000000000..fe6aa1f7a7 --- /dev/null +++ b/pymc/find_optim_prior.py @@ -0,0 +1,96 @@ +# Copyright 2021 The PyMC Developers +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from typing import List + +import aesara +import aesara.tensor as aet +import numpy as np + +from scipy import optimize + +import pymc as pm + +__all__ = ["find_optim_prior"] + + +def find_optim_prior( + pm_dist: pm.Distribution, + lower: float, + upper: float, + init_guess: List[float], + mass: float = 0.95, +) -> np.ndarray: + """ + Find optimal parameters to get `mass` % of probability + of `pm_dist` between `lower` and `upper`. + Note: only works for two-parameter distributions, as there + are exactly two constraints. + + Parameters + ---------- + pm_dist : pm.Distribution + PyMC distribution you want to set a prior on. + Needs to have a ``logcdf`` method implemented in PyMC. + lower : float + Lower bound to get `mass` % of probability of `pm_dist`. + upper : float + Upper bound to get `mass` % of probability of `pm_dist`. + init_guess: List[float] + Initial guess for ``scipy.optimize.least_squares`` to find the + optimal parameters of `pm_dist` fitting the interval constraint. + mass: float, default to 0.95 + Share of the probability mass we want between ``lower`` and ``upper``. + Defaults to 95%. + + Returns + ------- + The optimized distribution parameters. + """ + if len(pm_dist.rv_op.ndims_params) != 2: + raise NotImplementedError( + "This function only works only works for two-parameter distributions, as there are " + "exactly two constraints." + ) + + dist_params = aet.vector("dist_params") + lower_, upper_ = aet.scalars("lower", "upper") + + dist_ = pm_dist.dist(*[dist_params[i] for i in range(len(init_guess))]) + try: + logcdf_lower = pm.logcdf(dist_, lower_) + logcdf_upper = pm.logcdf(dist_, upper_) + except AttributeError: + raise AttributeError( + f"You cannot use `find_optim_params` with {pm_dist} -- it doesn't have a logcdf " + f"method yet. Open an issue or, even better, a PR on PyMC repo if you really need it." + ) + + alpha = 1 - mass + out = [logcdf_lower - np.log(alpha / 2), logcdf_upper - np.log(1 - alpha / 2)] + logcdf = aesara.function([dist_params, lower_, upper_], out) + + try: + symb_grad = aet.as_tensor_variable([pm.gradient(o, [dist_params]) for o in out]) + jac = aesara.function([dist_params, lower_, upper_], symb_grad) + + # when PyMC cannot compute the gradient + except Exception: + jac = "2-point" + + opt = optimize.least_squares(logcdf, init_guess, jac=jac, args=(lower, upper)) + if not opt.success: + raise ValueError("Optimization of parameters failed.") + + return opt.x From 77a0bb1cd4a91d7ee14d5f21ade7cec373467f7b Mon Sep 17 00:00:00 2001 From: AlexAndorra Date: Tue, 30 Nov 2021 17:40:12 +0100 Subject: [PATCH 19/51] pre-commit formatting --- pymc/find_optim_prior.py | 6 +++--- pymc/tests/test_util.py | 14 ++++++++++++++ 2 files changed, 17 insertions(+), 3 deletions(-) diff --git a/pymc/find_optim_prior.py b/pymc/find_optim_prior.py index fe6aa1f7a7..365cc2a5d1 100644 --- a/pymc/find_optim_prior.py +++ b/pymc/find_optim_prior.py @@ -56,12 +56,12 @@ def find_optim_prior( Returns ------- - The optimized distribution parameters. + The optimized distribution parameters as a numpy array. """ if len(pm_dist.rv_op.ndims_params) != 2: raise NotImplementedError( - "This function only works only works for two-parameter distributions, as there are " - "exactly two constraints." + "This function only works for two-parameter distributions, as there are exactly two " + "constraints. " ) dist_params = aet.vector("dist_params") diff --git a/pymc/tests/test_util.py b/pymc/tests/test_util.py index c0500d3b97..724421ef39 100644 --- a/pymc/tests/test_util.py +++ b/pymc/tests/test_util.py @@ -142,3 +142,17 @@ def fn(a=UNSET): help(fn) captured = capsys.readouterr() assert "a=UNSET" in captured.out + + +def test_find_optim_prior(): + MASS = 0.95 + opt_params = pm.find_optim_prior(pm.Gamma, lower=0.1, upper=0.4, mass=MASS, init_guess=[1, 10]) + np.testing.assert_allclose(opt_params, np.array([8.47481597, 37.65435601])) + + opt_params = pm.find_optim_prior( + pm.Normal, lower=155, upper=180, mass=MASS, init_guess=[170, 3] + ) + np.testing.assert_allclose(opt_params, np.array([167.5000001, 6.37766828])) + + with pytest.raises(NotImplementedError, match="only works for two-parameter distributions"): + pm.find_optim_prior(pm.Exponential, lower=0.1, upper=0.4, mass=MASS, init_guess=[1]) From fd5f4987560a637c4b7e7f8bf62c17527d3b899d Mon Sep 17 00:00:00 2001 From: AlexAndorra Date: Tue, 30 Nov 2021 17:54:44 +0100 Subject: [PATCH 20/51] Mention in release notes --- RELEASE-NOTES.md | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/RELEASE-NOTES.md b/RELEASE-NOTES.md index ad7f499b32..5bf0a6c61c 100644 --- a/RELEASE-NOTES.md +++ b/RELEASE-NOTES.md @@ -114,7 +114,8 @@ This includes API changes we did not warn about since at least `3.11.0` (2021-01 - Modify how particle weights are computed. This improves accuracy of the modeled function (see [5177](https://github.com/pymc-devs/pymc3/pull/5177)). - Improve sampling, increase default number of particles [5229](https://github.com/pymc-devs/pymc3/pull/5229). - `pm.Data` now passes additional kwargs to `aesara.shared`. [#5098](https://github.com/pymc-devs/pymc/pull/5098) -- ... +- The new `pm.find_optim_prior` function can be used to find the optimal prior parameters of a distribution under some + constraints (e.g lower and upper bound). See [#5231](https://github.com/pymc-devs/pymc/pull/5231). ### Internal changes From 171a4aa9907e75d5b8f7d1f22342a645cff2d859 Mon Sep 17 00:00:00 2001 From: AlexAndorra Date: Wed, 1 Dec 2021 16:52:38 +0100 Subject: [PATCH 21/51] Handle 1-param and 3-param distributions --- pymc/find_optim_prior.py | 64 +++++++++++++++++++++++++++++++--------- 1 file changed, 50 insertions(+), 14 deletions(-) diff --git a/pymc/find_optim_prior.py b/pymc/find_optim_prior.py index 365cc2a5d1..520c19c8f0 100644 --- a/pymc/find_optim_prior.py +++ b/pymc/find_optim_prior.py @@ -12,7 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -from typing import List +from typing import Dict, Optional import aesara import aesara.tensor as aet @@ -29,14 +29,16 @@ def find_optim_prior( pm_dist: pm.Distribution, lower: float, upper: float, - init_guess: List[float], + init_guess: Dict[str, float], mass: float = 0.95, -) -> np.ndarray: + fixed_params: Optional[Dict[str, float]] = None, +) -> Dict[str, float]: """ Find optimal parameters to get `mass` % of probability of `pm_dist` between `lower` and `upper`. - Note: only works for two-parameter distributions, as there - are exactly two constraints. + Note: only works for one- and two-parameter distributions, as there + are exactly two constraints. Fix some combination of parameters + if you want to use it on >=3-parameter distributions. Parameters ---------- @@ -47,27 +49,50 @@ def find_optim_prior( Lower bound to get `mass` % of probability of `pm_dist`. upper : float Upper bound to get `mass` % of probability of `pm_dist`. - init_guess: List[float] + init_guess: Dict[str, float] Initial guess for ``scipy.optimize.least_squares`` to find the optimal parameters of `pm_dist` fitting the interval constraint. + Must be a dictionary with the name of the PyMC distribution's + parameter as keys and the initial guess as values. mass: float, default to 0.95 Share of the probability mass we want between ``lower`` and ``upper``. Defaults to 95%. + fixed_params: Dict[str, float], Optional, default None + Only used when `pm_dist` has at least three parameters. + Dictionary of fixed parameters, so that there are only 2 to optimize. + For instance, for a StudenT, you fix nu to a constant and get the optimized + mu and sigma. Returns ------- - The optimized distribution parameters as a numpy array. + The optimized distribution parameters as a dictionary with the parameters' + name as key and the optimized value as value. """ - if len(pm_dist.rv_op.ndims_params) != 2: - raise NotImplementedError( - "This function only works for two-parameter distributions, as there are exactly two " - "constraints. " + if len(init_guess) > 2: + if (fixed_params is None) or (len(fixed_params) < (len(pm_dist.rv_op.ndims_params) - 2)): + raise NotImplementedError( + "This function can only optimize two parameters. " + f"{pm_dist} has {len(pm_dist.rv_op.ndims_params)} parameters. " + f"You need to fix {len(pm_dist.rv_op.ndims_params) - 2} parameters in the " + "`fixed_params` dictionary." + ) + elif (len(init_guess) < 2) and (len(init_guess) < len(pm_dist.rv_op.ndims_params)): + raise ValueError( + f"{pm_dist} has {len(pm_dist.rv_op.ndims_params)} parameters, but you provided only " + f"{len(init_guess)} initial guess. You need to provide 2." ) dist_params = aet.vector("dist_params") + params_to_optim = { + arg_name: dist_params[i] for arg_name, i in zip(init_guess.keys(), range(len(init_guess))) + } lower_, upper_ = aet.scalars("lower", "upper") - dist_ = pm_dist.dist(*[dist_params[i] for i in range(len(init_guess))]) + if fixed_params is not None: + dist_ = pm_dist.dist(**(params_to_optim | fixed_params)) + else: + dist_ = pm_dist.dist(**params_to_optim) + try: logcdf_lower = pm.logcdf(dist_, lower_) logcdf_upper = pm.logcdf(dist_, upper_) @@ -89,8 +114,19 @@ def find_optim_prior( except Exception: jac = "2-point" - opt = optimize.least_squares(logcdf, init_guess, jac=jac, args=(lower, upper)) + opt = optimize.least_squares(logcdf, x0=list(init_guess.values()), jac=jac, args=(lower, upper)) if not opt.success: raise ValueError("Optimization of parameters failed.") - return opt.x + if fixed_params is not None: + return { + param_name: param_value for param_name, param_value in zip(init_guess.keys(), opt.x) + } | fixed_params + else: + return { + param_name: param_value for param_name, param_value in zip(init_guess.keys(), opt.x) + } + + +# check that Scipy's parametrization is used +# handle 1-param case From 36b95cbd3a2276a8e00bdd9037e32da10f734c33 Mon Sep 17 00:00:00 2001 From: AlexAndorra Date: Wed, 1 Dec 2021 16:54:26 +0100 Subject: [PATCH 22/51] Update tests --- pymc/tests/test_util.py | 55 ++++++++++++++++++++++++++++++++++++----- 1 file changed, 49 insertions(+), 6 deletions(-) diff --git a/pymc/tests/test_util.py b/pymc/tests/test_util.py index 724421ef39..664face7c7 100644 --- a/pymc/tests/test_util.py +++ b/pymc/tests/test_util.py @@ -146,13 +146,56 @@ def fn(a=UNSET): def test_find_optim_prior(): MASS = 0.95 - opt_params = pm.find_optim_prior(pm.Gamma, lower=0.1, upper=0.4, mass=MASS, init_guess=[1, 10]) - np.testing.assert_allclose(opt_params, np.array([8.47481597, 37.65435601])) + # normal case opt_params = pm.find_optim_prior( - pm.Normal, lower=155, upper=180, mass=MASS, init_guess=[170, 3] + pm.Gamma, lower=0.1, upper=0.4, mass=MASS, init_guess={"alpha": 1, "beta": 10} ) - np.testing.assert_allclose(opt_params, np.array([167.5000001, 6.37766828])) + np.testing.assert_allclose(np.asarray(opt_params.values()), np.array([8.47481597, 37.65435601])) - with pytest.raises(NotImplementedError, match="only works for two-parameter distributions"): - pm.find_optim_prior(pm.Exponential, lower=0.1, upper=0.4, mass=MASS, init_guess=[1]) + # normal case, other distribution + opt_params = pm.find_optim_prior( + pm.Normal, lower=155, upper=180, mass=MASS, init_guess={"mu": 170, "sigma": 3} + ) + np.testing.assert_allclose(np.asarray(opt_params.values()), np.array([167.5000001, 6.37766828])) + + # 1-param case + opt_params = pm.find_optim_prior( + pm.Exponential, lower=0.1, upper=0.4, mass=MASS, init_guess={"lam": 10} + ) + np.testing.assert_allclose(np.asarray(opt_params.values()), np.array([0.79929324])) + + # 3-param case + opt_params = pm.find_optim_prior( + pm.StudentT, + lower=0.1, + upper=0.4, + mass=MASS, + init_guess={"mu": 170, "sigma": 3}, + fixed_params={"nu": 7}, + ) + np.testing.assert_allclose(np.asarray(opt_params.values()), np.array([0.25, 0.06343503])) + + with pytest.raises(ValueError, match="parameters, but you provided only"): + pm.find_optim_prior( + pm.Gamma, + lower=0.1, + upper=0.4, + mass=MASS, + init_guess={"alpha": 1}, + fixed_params={"beta": 10}, + ) + + with pytest.raises(TypeError, match="required positional argument"): + pm.find_optim_prior( + pm.StudentT, lower=0.1, upper=0.4, mass=MASS, init_guess={"mu": 170, "sigma": 3} + ) + + with pytest.raises(NotImplementedError, match="This function can only optimize two parameters"): + pm.find_optim_prior( + pm.StudentT, + lower=0.1, + upper=0.4, + mass=MASS, + init_guess={"mu": 170, "sigma": 3, "nu": 7}, + ) From 55138d9d27618872b361be990ee4475217d950a8 Mon Sep 17 00:00:00 2001 From: AlexAndorra Date: Wed, 1 Dec 2021 17:47:51 +0100 Subject: [PATCH 23/51] Fix some wording --- pymc/find_optim_prior.py | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/pymc/find_optim_prior.py b/pymc/find_optim_prior.py index 520c19c8f0..4ca5e3c0cd 100644 --- a/pymc/find_optim_prior.py +++ b/pymc/find_optim_prior.py @@ -98,8 +98,9 @@ def find_optim_prior( logcdf_upper = pm.logcdf(dist_, upper_) except AttributeError: raise AttributeError( - f"You cannot use `find_optim_params` with {pm_dist} -- it doesn't have a logcdf " - f"method yet. Open an issue or, even better, a PR on PyMC repo if you really need it." + f"You cannot use `find_optim_prior` with {pm_dist} -- it doesn't have a logcdf " + "method yet. Open an issue or, even better, a pull request on PyMC repo if you really " + "need it." ) alpha = 1 - mass @@ -126,7 +127,3 @@ def find_optim_prior( return { param_name: param_value for param_name, param_value in zip(init_guess.keys(), opt.x) } - - -# check that Scipy's parametrization is used -# handle 1-param case From 4bed2cd3ab2663e80a52fa793845ab39996d5598 Mon Sep 17 00:00:00 2001 From: AlexAndorra Date: Fri, 3 Dec 2021 13:32:20 +0100 Subject: [PATCH 24/51] pre-commit formatting --- pymc/find_optim_prior.py | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/pymc/find_optim_prior.py b/pymc/find_optim_prior.py index 4ca5e3c0cd..8a99c2f0a1 100644 --- a/pymc/find_optim_prior.py +++ b/pymc/find_optim_prior.py @@ -11,6 +11,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. +import warnings from typing import Dict, Optional @@ -70,17 +71,13 @@ def find_optim_prior( """ if len(init_guess) > 2: if (fixed_params is None) or (len(fixed_params) < (len(pm_dist.rv_op.ndims_params) - 2)): - raise NotImplementedError( - "This function can only optimize two parameters. " - f"{pm_dist} has {len(pm_dist.rv_op.ndims_params)} parameters. " - f"You need to fix {len(pm_dist.rv_op.ndims_params) - 2} parameters in the " - "`fixed_params` dictionary." + warnings.warn( + f"It seems like {pm_dist} has {len(pm_dist.rv_op.ndims_params)} parameters. " + "Note that `pm.find_optim_prior` can only optimize two parameters, so there may " + "not be a unique solution in your use-case. " + f"Consider fixing {len(pm_dist.rv_op.ndims_params) - 2} parameters in the " + f"`fixed_params` dictionary. " ) - elif (len(init_guess) < 2) and (len(init_guess) < len(pm_dist.rv_op.ndims_params)): - raise ValueError( - f"{pm_dist} has {len(pm_dist.rv_op.ndims_params)} parameters, but you provided only " - f"{len(init_guess)} initial guess. You need to provide 2." - ) dist_params = aet.vector("dist_params") params_to_optim = { @@ -103,8 +100,7 @@ def find_optim_prior( "need it." ) - alpha = 1 - mass - out = [logcdf_lower - np.log(alpha / 2), logcdf_upper - np.log(1 - alpha / 2)] + out = pm.math.logdiffexp(logcdf_upper, logcdf_lower) - np.log(mass) logcdf = aesara.function([dist_params, lower_, upper_], out) try: @@ -127,3 +123,7 @@ def find_optim_prior( return { param_name: param_value for param_name, param_value in zip(init_guess.keys(), opt.x) } + + +# tests +# add warning when mass is lower than expected -- try another initial guess? From 02d117bee0b20659eb03693406ae41f1aaf57bf7 Mon Sep 17 00:00:00 2001 From: AlexAndorra Date: Fri, 3 Dec 2021 16:38:46 +0100 Subject: [PATCH 25/51] Only raise UserWarning when mass_in_interval not optimal --- pymc/find_optim_prior.py | 30 +++++++++++++++++------------- 1 file changed, 17 insertions(+), 13 deletions(-) diff --git a/pymc/find_optim_prior.py b/pymc/find_optim_prior.py index 8a99c2f0a1..a1e81c2865 100644 --- a/pymc/find_optim_prior.py +++ b/pymc/find_optim_prior.py @@ -69,15 +69,9 @@ def find_optim_prior( The optimized distribution parameters as a dictionary with the parameters' name as key and the optimized value as value. """ - if len(init_guess) > 2: - if (fixed_params is None) or (len(fixed_params) < (len(pm_dist.rv_op.ndims_params) - 2)): - warnings.warn( - f"It seems like {pm_dist} has {len(pm_dist.rv_op.ndims_params)} parameters. " - "Note that `pm.find_optim_prior` can only optimize two parameters, so there may " - "not be a unique solution in your use-case. " - f"Consider fixing {len(pm_dist.rv_op.ndims_params) - 2} parameters in the " - f"`fixed_params` dictionary. " - ) + # exit when any parameter is not scalar: + if np.any(np.asarray(pm_dist.rv_op.ndims_params) != 0): + raise NotImplementedError dist_params = aet.vector("dist_params") params_to_optim = { @@ -116,14 +110,24 @@ def find_optim_prior( raise ValueError("Optimization of parameters failed.") if fixed_params is not None: - return { + opt_params = { param_name: param_value for param_name, param_value in zip(init_guess.keys(), opt.x) } | fixed_params else: - return { + opt_params = { param_name: param_value for param_name, param_value in zip(init_guess.keys(), opt.x) } + opt_dist = pm_dist.dist(**opt_params) + mass_in_interval = ( + pm.math.exp(pm.logcdf(opt_dist, upper)) - pm.math.exp(pm.logcdf(opt_dist, lower)) + ).eval() + if (np.abs(mass_in_interval - mass)) >= 0.01: + warnings.warn( + f"Final optimization has {mass_in_interval * 100:.0f}% of probability mass between " + f"{lower} and {upper} instead of the requested {mass * 100:.0f}%." + "You may need to use a more flexible distribution, change the fixed paramaters in the " + "`fixed_params` dictionary, or provide better initial guesses." + ) -# tests -# add warning when mass is lower than expected -- try another initial guess? + return opt_params From 774257173d7dad4358dc0b21688459624ed7dcb7 Mon Sep 17 00:00:00 2001 From: AlexAndorra Date: Fri, 3 Dec 2021 16:45:29 +0100 Subject: [PATCH 26/51] Raise NotImplementedError for non-scalar params --- pymc/find_optim_prior.py | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/pymc/find_optim_prior.py b/pymc/find_optim_prior.py index a1e81c2865..8bc6ce4228 100644 --- a/pymc/find_optim_prior.py +++ b/pymc/find_optim_prior.py @@ -71,7 +71,10 @@ def find_optim_prior( """ # exit when any parameter is not scalar: if np.any(np.asarray(pm_dist.rv_op.ndims_params) != 0): - raise NotImplementedError + raise NotImplementedError( + "`pm.find_optim_prior` does not work with non-scalar parameters yet.\n" + "Feel free to open a pull request on PyMC repo if you really need this feature." + ) dist_params = aet.vector("dist_params") params_to_optim = { @@ -90,7 +93,7 @@ def find_optim_prior( except AttributeError: raise AttributeError( f"You cannot use `find_optim_prior` with {pm_dist} -- it doesn't have a logcdf " - "method yet. Open an issue or, even better, a pull request on PyMC repo if you really " + "method yet.\nOpen an issue or, even better, a pull request on PyMC repo if you really " "need it." ) @@ -125,8 +128,8 @@ def find_optim_prior( if (np.abs(mass_in_interval - mass)) >= 0.01: warnings.warn( f"Final optimization has {mass_in_interval * 100:.0f}% of probability mass between " - f"{lower} and {upper} instead of the requested {mass * 100:.0f}%." - "You may need to use a more flexible distribution, change the fixed paramaters in the " + f"{lower} and {upper} instead of the requested {mass * 100:.0f}%.\n" + "You may need to use a more flexible distribution, change the fixed parameters in the " "`fixed_params` dictionary, or provide better initial guesses." ) From 8a6e0e7db06fe1d4106b9a22ba2e243275d42b37 Mon Sep 17 00:00:00 2001 From: AlexAndorra Date: Fri, 3 Dec 2021 17:03:45 +0100 Subject: [PATCH 27/51] Remove pipe operator for old python versions --- pymc/find_optim_prior.py | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/pymc/find_optim_prior.py b/pymc/find_optim_prior.py index 8bc6ce4228..3dea6f47a9 100644 --- a/pymc/find_optim_prior.py +++ b/pymc/find_optim_prior.py @@ -112,15 +112,14 @@ def find_optim_prior( if not opt.success: raise ValueError("Optimization of parameters failed.") + # save optimal parameters + opt_params = { + param_name: param_value for param_name, param_value in zip(init_guess.keys(), opt.x) + } if fixed_params is not None: - opt_params = { - param_name: param_value for param_name, param_value in zip(init_guess.keys(), opt.x) - } | fixed_params - else: - opt_params = { - param_name: param_value for param_name, param_value in zip(init_guess.keys(), opt.x) - } + opt_params.update(fixed_params) + # check mass in interval is not too far from `mass` opt_dist = pm_dist.dist(**opt_params) mass_in_interval = ( pm.math.exp(pm.logcdf(opt_dist, upper)) - pm.math.exp(pm.logcdf(opt_dist, lower)) From 602391bee5f9b1a9644b941a806db51f74aedbc1 Mon Sep 17 00:00:00 2001 From: AlexAndorra Date: Fri, 3 Dec 2021 17:04:31 +0100 Subject: [PATCH 28/51] Update tests --- pymc/tests/test_util.py | 61 +++++++++++++++++++++++++++++------------ 1 file changed, 44 insertions(+), 17 deletions(-) diff --git a/pymc/tests/test_util.py b/pymc/tests/test_util.py index 664face7c7..691e61f2b6 100644 --- a/pymc/tests/test_util.py +++ b/pymc/tests/test_util.py @@ -147,25 +147,23 @@ def fn(a=UNSET): def test_find_optim_prior(): MASS = 0.95 - # normal case + # Gamma, normal case opt_params = pm.find_optim_prior( pm.Gamma, lower=0.1, upper=0.4, mass=MASS, init_guess={"alpha": 1, "beta": 10} ) - np.testing.assert_allclose(np.asarray(opt_params.values()), np.array([8.47481597, 37.65435601])) + np.testing.assert_allclose( + list(opt_params.values()), np.array([8.506023352404027, 37.59626616198404]) + ) - # normal case, other distribution + # Normal, normal case opt_params = pm.find_optim_prior( pm.Normal, lower=155, upper=180, mass=MASS, init_guess={"mu": 170, "sigma": 3} ) - np.testing.assert_allclose(np.asarray(opt_params.values()), np.array([167.5000001, 6.37766828])) - - # 1-param case - opt_params = pm.find_optim_prior( - pm.Exponential, lower=0.1, upper=0.4, mass=MASS, init_guess={"lam": 10} + np.testing.assert_allclose( + list(opt_params.values()), np.array([170.76059047372624, 5.542895384602784]) ) - np.testing.assert_allclose(np.asarray(opt_params.values()), np.array([0.79929324])) - # 3-param case + # Student, works as expected opt_params = pm.find_optim_prior( pm.StudentT, lower=0.1, @@ -174,9 +172,36 @@ def test_find_optim_prior(): init_guess={"mu": 170, "sigma": 3}, fixed_params={"nu": 7}, ) - np.testing.assert_allclose(np.asarray(opt_params.values()), np.array([0.25, 0.06343503])) + assert "nu" in opt_params + np.testing.assert_allclose( + list(opt_params.values()), np.array([0.24995405785756986, 0.06343501657095188, 7]) + ) - with pytest.raises(ValueError, match="parameters, but you provided only"): + # Student not deterministic but without warning + with pytest.warns(None) as record: + pm.find_optim_prior( + pm.StudentT, + lower=0, + upper=1, + mass=MASS, + init_guess={"mu": 5, "sigma": 2, "nu": 7}, + ) + assert len(record) == 0 + + # Exponential without warning + with pytest.warns(None) as record: + opt_params = pm.find_optim_prior( + pm.Exponential, lower=0, upper=1, mass=MASS, init_guess={"lam": 1} + ) + assert len(record) == 0 + np.testing.assert_allclose(list(opt_params.values()), np.array([2.9957322673241604])) + + # Exponential too constraining + with pytest.warns(UserWarning, match="instead of the requested 95%"): + pm.find_optim_prior(pm.Exponential, lower=0.1, upper=1, mass=MASS, init_guess={"lam": 1}) + + # Gamma too constraining + with pytest.warns(UserWarning, match="instead of the requested 95%"): pm.find_optim_prior( pm.Gamma, lower=0.1, @@ -186,16 +211,18 @@ def test_find_optim_prior(): fixed_params={"beta": 10}, ) + # missing param with pytest.raises(TypeError, match="required positional argument"): pm.find_optim_prior( pm.StudentT, lower=0.1, upper=0.4, mass=MASS, init_guess={"mu": 170, "sigma": 3} ) - with pytest.raises(NotImplementedError, match="This function can only optimize two parameters"): + # non-scalar params + with pytest.raises(NotImplementedError, match="does not work with non-scalar parameters yet"): pm.find_optim_prior( - pm.StudentT, - lower=0.1, - upper=0.4, + pm.MvNormal, + lower=0, + upper=1, mass=MASS, - init_guess={"mu": 170, "sigma": 3, "nu": 7}, + init_guess={"mu": 5, "cov": np.asarray([[1, 0.2], [0.2, 1]])}, ) From 9bb14a3cbae73c70a0556a1309d2a809b0a9991f Mon Sep 17 00:00:00 2001 From: AlexAndorra Date: Tue, 7 Dec 2021 15:03:33 +0100 Subject: [PATCH 29/51] Add test with discrete distrib & wrap in pytest.warns(None) --- pymc/tests/test_util.py | 48 +++++++++++++++++++++++++++++------------ 1 file changed, 34 insertions(+), 14 deletions(-) diff --git a/pymc/tests/test_util.py b/pymc/tests/test_util.py index 691e61f2b6..d19eeea919 100644 --- a/pymc/tests/test_util.py +++ b/pymc/tests/test_util.py @@ -148,30 +148,36 @@ def test_find_optim_prior(): MASS = 0.95 # Gamma, normal case - opt_params = pm.find_optim_prior( - pm.Gamma, lower=0.1, upper=0.4, mass=MASS, init_guess={"alpha": 1, "beta": 10} - ) + with pytest.warns(None) as record: + opt_params = pm.find_optim_prior( + pm.Gamma, lower=0.1, upper=0.4, mass=MASS, init_guess={"alpha": 1, "beta": 10} + ) + assert len(record) == 0 np.testing.assert_allclose( list(opt_params.values()), np.array([8.506023352404027, 37.59626616198404]) ) # Normal, normal case - opt_params = pm.find_optim_prior( - pm.Normal, lower=155, upper=180, mass=MASS, init_guess={"mu": 170, "sigma": 3} - ) + with pytest.warns(None) as record: + opt_params = pm.find_optim_prior( + pm.Normal, lower=155, upper=180, mass=MASS, init_guess={"mu": 170, "sigma": 3} + ) + assert len(record) == 0 np.testing.assert_allclose( list(opt_params.values()), np.array([170.76059047372624, 5.542895384602784]) ) # Student, works as expected - opt_params = pm.find_optim_prior( - pm.StudentT, - lower=0.1, - upper=0.4, - mass=MASS, - init_guess={"mu": 170, "sigma": 3}, - fixed_params={"nu": 7}, - ) + with pytest.warns(None) as record: + opt_params = pm.find_optim_prior( + pm.StudentT, + lower=0.1, + upper=0.4, + mass=MASS, + init_guess={"mu": 170, "sigma": 3}, + fixed_params={"nu": 7}, + ) + assert len(record) == 0 assert "nu" in opt_params np.testing.assert_allclose( list(opt_params.values()), np.array([0.24995405785756986, 0.06343501657095188, 7]) @@ -188,6 +194,14 @@ def test_find_optim_prior(): ) assert len(record) == 0 + # Binomial, works as expected + with pytest.warns(None) as record: + opt_params = pm.find_optim_prior( + pm.Binomial, lower=2, upper=8, mass=MASS, init_guess={"p": 0.5}, fixed_params={"n": 10} + ) + assert len(record) == 0 + np.testing.assert_allclose(list(opt_params.values()), np.array([0.5575067955294625, 10])) + # Exponential without warning with pytest.warns(None) as record: opt_params = pm.find_optim_prior( @@ -211,6 +225,12 @@ def test_find_optim_prior(): fixed_params={"beta": 10}, ) + # Binomial too constraining + with pytest.warns(UserWarning, match="instead of the requested 95%"): + pm.find_optim_prior( + pm.Binomial, lower=0, upper=2, mass=MASS, init_guess={"p": 0.8}, fixed_params={"n": 10} + ) + # missing param with pytest.raises(TypeError, match="required positional argument"): pm.find_optim_prior( From ab0ef0f27c1f3c582b8f52fbfb792df2a54db755 Mon Sep 17 00:00:00 2001 From: AlexAndorra Date: Tue, 7 Dec 2021 15:34:41 +0100 Subject: [PATCH 30/51] Remove pipe operator for good --- pymc/find_optim_prior.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/pymc/find_optim_prior.py b/pymc/find_optim_prior.py index 3dea6f47a9..fd90fe0a3e 100644 --- a/pymc/find_optim_prior.py +++ b/pymc/find_optim_prior.py @@ -83,9 +83,9 @@ def find_optim_prior( lower_, upper_ = aet.scalars("lower", "upper") if fixed_params is not None: - dist_ = pm_dist.dist(**(params_to_optim | fixed_params)) - else: - dist_ = pm_dist.dist(**params_to_optim) + params_to_optim.update(fixed_params) + + dist_ = pm_dist.dist(**params_to_optim) try: logcdf_lower = pm.logcdf(dist_, lower_) From 58f5d5608df021c3fa4e322b35feca7edfb8bccf Mon Sep 17 00:00:00 2001 From: AlexAndorra Date: Tue, 7 Dec 2021 16:49:18 +0100 Subject: [PATCH 31/51] Fix TypeError in dist_params --- pymc/find_optim_prior.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pymc/find_optim_prior.py b/pymc/find_optim_prior.py index fd90fe0a3e..e4cfb8fffc 100644 --- a/pymc/find_optim_prior.py +++ b/pymc/find_optim_prior.py @@ -76,7 +76,7 @@ def find_optim_prior( "Feel free to open a pull request on PyMC repo if you really need this feature." ) - dist_params = aet.vector("dist_params") + dist_params = aet.vector("dist_params", dtype=aesara.config.floatX) params_to_optim = { arg_name: dist_params[i] for arg_name, i in zip(init_guess.keys(), range(len(init_guess))) } From a6c7f0db195c7fc741a6a64d52d19d49fc6a2304 Mon Sep 17 00:00:00 2001 From: AlexAndorra Date: Tue, 7 Dec 2021 16:49:45 +0100 Subject: [PATCH 32/51] Relax tolerance for tests --- pymc/tests/test_util.py | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/pymc/tests/test_util.py b/pymc/tests/test_util.py index d19eeea919..d9e77603db 100644 --- a/pymc/tests/test_util.py +++ b/pymc/tests/test_util.py @@ -154,7 +154,7 @@ def test_find_optim_prior(): ) assert len(record) == 0 np.testing.assert_allclose( - list(opt_params.values()), np.array([8.506023352404027, 37.59626616198404]) + list(opt_params.values()), np.array([8.506023352404027, 37.59626616198404]), rtol=1e-03 ) # Normal, normal case @@ -164,7 +164,7 @@ def test_find_optim_prior(): ) assert len(record) == 0 np.testing.assert_allclose( - list(opt_params.values()), np.array([170.76059047372624, 5.542895384602784]) + list(opt_params.values()), np.array([170.76059047372624, 5.542895384602784]), rtol=1e-03 ) # Student, works as expected @@ -180,7 +180,9 @@ def test_find_optim_prior(): assert len(record) == 0 assert "nu" in opt_params np.testing.assert_allclose( - list(opt_params.values()), np.array([0.24995405785756986, 0.06343501657095188, 7]) + list(opt_params.values()), + np.array([0.24995405785756986, 0.06343501657095188, 7]), + rtol=1e-03, ) # Student not deterministic but without warning @@ -200,7 +202,9 @@ def test_find_optim_prior(): pm.Binomial, lower=2, upper=8, mass=MASS, init_guess={"p": 0.5}, fixed_params={"n": 10} ) assert len(record) == 0 - np.testing.assert_allclose(list(opt_params.values()), np.array([0.5575067955294625, 10])) + np.testing.assert_allclose( + list(opt_params.values()), np.array([0.5575067955294625, 10]), rtol=1e-03 + ) # Exponential without warning with pytest.warns(None) as record: @@ -208,7 +212,9 @@ def test_find_optim_prior(): pm.Exponential, lower=0, upper=1, mass=MASS, init_guess={"lam": 1} ) assert len(record) == 0 - np.testing.assert_allclose(list(opt_params.values()), np.array([2.9957322673241604])) + np.testing.assert_allclose( + list(opt_params.values()), np.array([2.9957322673241604]), rtol=1e-03 + ) # Exponential too constraining with pytest.warns(UserWarning, match="instead of the requested 95%"): From c9c24d6e5d7a42c61dae1b3a5efcb63843f32b5c Mon Sep 17 00:00:00 2001 From: AlexAndorra Date: Tue, 14 Dec 2021 17:34:27 +0100 Subject: [PATCH 33/51] Force float64 config in find_optim_prior --- pymc/find_optim_prior.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/pymc/find_optim_prior.py b/pymc/find_optim_prior.py index e4cfb8fffc..1b45415c32 100644 --- a/pymc/find_optim_prior.py +++ b/pymc/find_optim_prior.py @@ -76,7 +76,9 @@ def find_optim_prior( "Feel free to open a pull request on PyMC repo if you really need this feature." ) - dist_params = aet.vector("dist_params", dtype=aesara.config.floatX) + # force float64 config + aesara.config.floatX = "float64" + dist_params = aet.vector("dist_params") params_to_optim = { arg_name: dist_params[i] for arg_name, i in zip(init_guess.keys(), range(len(init_guess))) } From c75f8c9db5b674f15e3fd3f7a09a028c3242e6e7 Mon Sep 17 00:00:00 2001 From: AlexAndorra Date: Tue, 14 Dec 2021 17:36:48 +0100 Subject: [PATCH 34/51] Rename file name to func_utils.py --- pymc/__init__.py | 2 +- pymc/{find_optim_prior.py => func_utils.py} | 0 2 files changed, 1 insertion(+), 1 deletion(-) rename pymc/{find_optim_prior.py => func_utils.py} (100%) diff --git a/pymc/__init__.py b/pymc/__init__.py index f39cb6b590..3de1cae9db 100644 --- a/pymc/__init__.py +++ b/pymc/__init__.py @@ -81,7 +81,7 @@ def __set_compiler_flags(): from pymc.distributions import * from pymc.distributions import transforms from pymc.exceptions import * -from pymc.find_optim_prior import find_optim_prior +from pymc.func_utils import find_optim_prior from pymc.math import ( expand_packed_triangular, invlogit, diff --git a/pymc/find_optim_prior.py b/pymc/func_utils.py similarity index 100% rename from pymc/find_optim_prior.py rename to pymc/func_utils.py From 3ffd7ff29da0ee21de96ee843bab53612714c916 Mon Sep 17 00:00:00 2001 From: Ricardo Date: Thu, 16 Dec 2021 19:29:56 +0100 Subject: [PATCH 35/51] Change optimization error function and refactor tests --- pymc/func_utils.py | 34 +++++----- pymc/tests/test_util.py | 136 +++++++++++++++------------------------- 2 files changed, 68 insertions(+), 102 deletions(-) diff --git a/pymc/func_utils.py b/pymc/func_utils.py index 1b45415c32..cd998a17aa 100644 --- a/pymc/func_utils.py +++ b/pymc/func_utils.py @@ -27,7 +27,7 @@ def find_optim_prior( - pm_dist: pm.Distribution, + distribution: pm.Distribution, lower: float, upper: float, init_guess: Dict[str, float], @@ -43,7 +43,7 @@ def find_optim_prior( Parameters ---------- - pm_dist : pm.Distribution + distribution : pm.Distribution PyMC distribution you want to set a prior on. Needs to have a ``logcdf`` method implemented in PyMC. lower : float @@ -69,48 +69,48 @@ def find_optim_prior( The optimized distribution parameters as a dictionary with the parameters' name as key and the optimized value as value. """ + # TODO: Add user friendly ValueError for this check + assert 0.01 < mass < 0.99 + # exit when any parameter is not scalar: - if np.any(np.asarray(pm_dist.rv_op.ndims_params) != 0): + if np.any(np.asarray(distribution.rv_op.ndims_params) != 0): raise NotImplementedError( "`pm.find_optim_prior` does not work with non-scalar parameters yet.\n" "Feel free to open a pull request on PyMC repo if you really need this feature." ) - # force float64 config - aesara.config.floatX = "float64" dist_params = aet.vector("dist_params") params_to_optim = { arg_name: dist_params[i] for arg_name, i in zip(init_guess.keys(), range(len(init_guess))) } - lower_, upper_ = aet.scalars("lower", "upper") if fixed_params is not None: params_to_optim.update(fixed_params) - dist_ = pm_dist.dist(**params_to_optim) + dist = distribution.dist(**params_to_optim) try: - logcdf_lower = pm.logcdf(dist_, lower_) - logcdf_upper = pm.logcdf(dist_, upper_) + logcdf_lower = pm.logcdf(dist, pm.floatX(lower)) + logcdf_upper = pm.logcdf(dist, pm.floatX(upper)) except AttributeError: raise AttributeError( - f"You cannot use `find_optim_prior` with {pm_dist} -- it doesn't have a logcdf " + f"You cannot use `find_optim_prior` with {distribution} -- it doesn't have a logcdf " "method yet.\nOpen an issue or, even better, a pull request on PyMC repo if you really " "need it." ) - out = pm.math.logdiffexp(logcdf_upper, logcdf_lower) - np.log(mass) - logcdf = aesara.function([dist_params, lower_, upper_], out) + cdf_error = (pm.math.exp(logcdf_upper) - pm.math.exp(logcdf_lower)) - mass + cdf_error_fn = aesara.function([dist_params], cdf_error, allow_input_downcast=True) try: - symb_grad = aet.as_tensor_variable([pm.gradient(o, [dist_params]) for o in out]) - jac = aesara.function([dist_params, lower_, upper_], symb_grad) - + aesara_jac = pm.gradient(cdf_error, [dist_params]) + jac = aesara.function([dist_params], aesara_jac, allow_input_downcast=True) # when PyMC cannot compute the gradient + # TODO: use specific gradient not implemented exception except Exception: jac = "2-point" - opt = optimize.least_squares(logcdf, x0=list(init_guess.values()), jac=jac, args=(lower, upper)) + opt = optimize.least_squares(cdf_error_fn, x0=list(init_guess.values()), jac=jac) if not opt.success: raise ValueError("Optimization of parameters failed.") @@ -122,7 +122,7 @@ def find_optim_prior( opt_params.update(fixed_params) # check mass in interval is not too far from `mass` - opt_dist = pm_dist.dist(**opt_params) + opt_dist = distribution.dist(**opt_params) mass_in_interval = ( pm.math.exp(pm.logcdf(opt_dist, upper)) - pm.math.exp(pm.logcdf(opt_dist, lower)) ).eval() diff --git a/pymc/tests/test_util.py b/pymc/tests/test_util.py index d9e77603db..782d2e0603 100644 --- a/pymc/tests/test_util.py +++ b/pymc/tests/test_util.py @@ -144,103 +144,69 @@ def fn(a=UNSET): assert "a=UNSET" in captured.out -def test_find_optim_prior(): - MASS = 0.95 - - # Gamma, normal case - with pytest.warns(None) as record: - opt_params = pm.find_optim_prior( - pm.Gamma, lower=0.1, upper=0.4, mass=MASS, init_guess={"alpha": 1, "beta": 10} - ) - assert len(record) == 0 - np.testing.assert_allclose( - list(opt_params.values()), np.array([8.506023352404027, 37.59626616198404]), rtol=1e-03 - ) - - # Normal, normal case - with pytest.warns(None) as record: - opt_params = pm.find_optim_prior( - pm.Normal, lower=155, upper=180, mass=MASS, init_guess={"mu": 170, "sigma": 3} - ) - assert len(record) == 0 - np.testing.assert_allclose( - list(opt_params.values()), np.array([170.76059047372624, 5.542895384602784]), rtol=1e-03 - ) - - # Student, works as expected +@pytest.mark.parametrize( + "distribution, lower, upper, init_guess, fixed_params", + [ + (pm.Gamma, 0.1, 0.4, {"alpha": 1, "beta": 10}, {}), + (pm.Normal, 155, 180, {"mu": 170, "sigma": 3}, {}), + (pm.StudentT, 0.1, 0.4, {"mu": 10, "sigma": 3}, {"nu": 7}), + (pm.StudentT, 0, 1, {"mu": 5, "sigma": 2, "nu": 7}, {}), + # (pm.Exponential, 0, 1, {"lam": 1}, {}), # Gradient is failing miserably, figure out why + (pm.HalfNormal, 0, 1, {"sigma": 1}, {}), + (pm.Binomial, 0, 8, {"p": 0.5}, {"n": 10}), + (pm.Poisson, 0, 10, {"mu": 0.5}, {}), + ], +) +@pytest.mark.parametrize("mass", [0.5, 0.75, 0.95]) +def test_find_optim_prior(distribution, lower, upper, init_guess, fixed_params, mass): with pytest.warns(None) as record: opt_params = pm.find_optim_prior( - pm.StudentT, - lower=0.1, - upper=0.4, - mass=MASS, - init_guess={"mu": 170, "sigma": 3}, - fixed_params={"nu": 7}, - ) - assert len(record) == 0 - assert "nu" in opt_params - np.testing.assert_allclose( - list(opt_params.values()), - np.array([0.24995405785756986, 0.06343501657095188, 7]), - rtol=1e-03, - ) - - # Student not deterministic but without warning - with pytest.warns(None) as record: - pm.find_optim_prior( - pm.StudentT, - lower=0, - upper=1, - mass=MASS, - init_guess={"mu": 5, "sigma": 2, "nu": 7}, + distribution, + lower=lower, + upper=upper, + mass=mass, + init_guess=init_guess, + fixed_params=fixed_params, ) assert len(record) == 0 - # Binomial, works as expected - with pytest.warns(None) as record: - opt_params = pm.find_optim_prior( - pm.Binomial, lower=2, upper=8, mass=MASS, init_guess={"p": 0.5}, fixed_params={"n": 10} - ) - assert len(record) == 0 - np.testing.assert_allclose( - list(opt_params.values()), np.array([0.5575067955294625, 10]), rtol=1e-03 - ) - - # Exponential without warning - with pytest.warns(None) as record: - opt_params = pm.find_optim_prior( - pm.Exponential, lower=0, upper=1, mass=MASS, init_guess={"lam": 1} - ) - assert len(record) == 0 - np.testing.assert_allclose( - list(opt_params.values()), np.array([2.9957322673241604]), rtol=1e-03 - ) - - # Exponential too constraining - with pytest.warns(UserWarning, match="instead of the requested 95%"): - pm.find_optim_prior(pm.Exponential, lower=0.1, upper=1, mass=MASS, init_guess={"lam": 1}) - - # Gamma too constraining + opt_distribution = distribution.dist(**opt_params) + mass_in_interval = ( + pm.math.exp(pm.logcdf(opt_distribution, upper)) + - pm.math.exp(pm.logcdf(opt_distribution, lower)) + ).eval() + assert np.abs(mass_in_interval - mass) <= 1e-5 + + +@pytest.mark.parametrize( + "distribution, lower, upper, init_guess, fixed_params", + [ + (pm.Gamma, 0.1, 0.4, {"alpha": 1}, {"beta": 10}), + (pm.Exponential, 0.1, 1, {"lam": 1}, {}), + (pm.Binomial, 0, 2, {"p": 0.8}, {"n": 10}), + ], +) +def test_find_optim_prior_error_too_large(distribution, lower, upper, init_guess, fixed_params): with pytest.warns(UserWarning, match="instead of the requested 95%"): pm.find_optim_prior( - pm.Gamma, - lower=0.1, - upper=0.4, - mass=MASS, - init_guess={"alpha": 1}, - fixed_params={"beta": 10}, + distribution, + lower=lower, + upper=upper, + mass=0.95, + init_guess=init_guess, + fixed_params=fixed_params, ) - # Binomial too constraining - with pytest.warns(UserWarning, match="instead of the requested 95%"): - pm.find_optim_prior( - pm.Binomial, lower=0, upper=2, mass=MASS, init_guess={"p": 0.8}, fixed_params={"n": 10} - ) +def test_find_optim_prior_input_errors(): # missing param with pytest.raises(TypeError, match="required positional argument"): pm.find_optim_prior( - pm.StudentT, lower=0.1, upper=0.4, mass=MASS, init_guess={"mu": 170, "sigma": 3} + pm.StudentT, + lower=0.1, + upper=0.4, + mass=0.95, + init_guess={"mu": 170, "sigma": 3}, ) # non-scalar params @@ -249,6 +215,6 @@ def test_find_optim_prior(): pm.MvNormal, lower=0, upper=1, - mass=MASS, + mass=0.95, init_guess={"mu": 5, "cov": np.asarray([[1, 0.2], [0.2, 1]])}, ) From a1a6bdf9f917d3de51e02ea4b442c89818611a15 Mon Sep 17 00:00:00 2001 From: Ricardo Date: Tue, 21 Dec 2021 17:08:58 +0100 Subject: [PATCH 36/51] Use aesaraf.compile_pymc --- pymc/func_utils.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/pymc/func_utils.py b/pymc/func_utils.py index cd998a17aa..0585142bb8 100644 --- a/pymc/func_utils.py +++ b/pymc/func_utils.py @@ -15,7 +15,6 @@ from typing import Dict, Optional -import aesara import aesara.tensor as aet import numpy as np @@ -100,11 +99,11 @@ def find_optim_prior( ) cdf_error = (pm.math.exp(logcdf_upper) - pm.math.exp(logcdf_lower)) - mass - cdf_error_fn = aesara.function([dist_params], cdf_error, allow_input_downcast=True) + cdf_error_fn = pm.aesaraf.compile_pymc([dist_params], cdf_error, allow_input_downcast=True) try: aesara_jac = pm.gradient(cdf_error, [dist_params]) - jac = aesara.function([dist_params], aesara_jac, allow_input_downcast=True) + jac = pm.aesaraf.compile_pymc([dist_params], aesara_jac, allow_input_downcast=True) # when PyMC cannot compute the gradient # TODO: use specific gradient not implemented exception except Exception: From 1d868fae8bd82fb6e7d815166c1f9175fe95774e Mon Sep 17 00:00:00 2001 From: AlexAndorra Date: Wed, 22 Dec 2021 14:10:01 +0100 Subject: [PATCH 37/51] Add and test AssertionError for mass value --- pymc/func_utils.py | 8 +++++--- pymc/tests/test_util.py | 22 ++++++++++++++++++++++ 2 files changed, 27 insertions(+), 3 deletions(-) diff --git a/pymc/func_utils.py b/pymc/func_utils.py index 0585142bb8..744ca11f8c 100644 --- a/pymc/func_utils.py +++ b/pymc/func_utils.py @@ -68,8 +68,10 @@ def find_optim_prior( The optimized distribution parameters as a dictionary with the parameters' name as key and the optimized value as value. """ - # TODO: Add user friendly ValueError for this check - assert 0.01 < mass < 0.99 + assert 0.01 <= mass <= 0.99, ( + "This function optimizes the mass of the given distribution +/- " + f"1%, so `mass` has to be between 0.01 and 0.99. You provided {mass}." + ) # exit when any parameter is not scalar: if np.any(np.asarray(distribution.rv_op.ndims_params) != 0): @@ -125,7 +127,7 @@ def find_optim_prior( mass_in_interval = ( pm.math.exp(pm.logcdf(opt_dist, upper)) - pm.math.exp(pm.logcdf(opt_dist, lower)) ).eval() - if (np.abs(mass_in_interval - mass)) >= 0.01: + if (np.abs(mass_in_interval - mass)) > 0.01: warnings.warn( f"Final optimization has {mass_in_interval * 100:.0f}% of probability mass between " f"{lower} and {upper} instead of the requested {mass * 100:.0f}%.\n" diff --git a/pymc/tests/test_util.py b/pymc/tests/test_util.py index a9606287cc..246f523a46 100644 --- a/pymc/tests/test_util.py +++ b/pymc/tests/test_util.py @@ -209,6 +209,28 @@ def test_find_optim_prior_input_errors(): init_guess={"mu": 170, "sigma": 3}, ) + # mass too high + with pytest.raises(AssertionError, match="has to be between 0.01 and 0.99"): + pm.find_optim_prior( + pm.StudentT, + lower=0.1, + upper=0.4, + mass=0.995, + init_guess={"mu": 170, "sigma": 3}, + fixed_params={"nu": 7}, + ) + + # mass too low + with pytest.raises(AssertionError, match="has to be between 0.01 and 0.99"): + pm.find_optim_prior( + pm.StudentT, + lower=0.1, + upper=0.4, + mass=0.005, + init_guess={"mu": 170, "sigma": 3}, + fixed_params={"nu": 7}, + ) + # non-scalar params with pytest.raises(NotImplementedError, match="does not work with non-scalar parameters yet"): pm.find_optim_prior( From 063bc96a56e1ea5c9da218bd44b730c96610c4ba Mon Sep 17 00:00:00 2001 From: AlexAndorra Date: Thu, 23 Dec 2021 17:41:30 +0100 Subject: [PATCH 38/51] Fix type error in warning message --- pymc/func_utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pymc/func_utils.py b/pymc/func_utils.py index 744ca11f8c..50ff6d63f5 100644 --- a/pymc/func_utils.py +++ b/pymc/func_utils.py @@ -129,7 +129,7 @@ def find_optim_prior( ).eval() if (np.abs(mass_in_interval - mass)) > 0.01: warnings.warn( - f"Final optimization has {mass_in_interval * 100:.0f}% of probability mass between " + f"Final optimization has {(mass_in_interval if mass_in_interval.ndim < 1 else mass_in_interval[0])* 100:.0f}% of probability mass between " f"{lower} and {upper} instead of the requested {mass * 100:.0f}%.\n" "You may need to use a more flexible distribution, change the fixed parameters in the " "`fixed_params` dictionary, or provide better initial guesses." From cb7908cd9fe278196a42b899b86665015c5a3e47 Mon Sep 17 00:00:00 2001 From: AlexAndorra Date: Fri, 24 Dec 2021 16:20:24 +0100 Subject: [PATCH 39/51] Split up Poisson test --- pymc/tests/test_util.py | 40 +++++++++++++++++++++++++++++++++++++++- 1 file changed, 39 insertions(+), 1 deletion(-) diff --git a/pymc/tests/test_util.py b/pymc/tests/test_util.py index 246f523a46..8d32c25b5d 100644 --- a/pymc/tests/test_util.py +++ b/pymc/tests/test_util.py @@ -12,6 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. +import aesara import numpy as np import pytest @@ -154,7 +155,6 @@ def fn(a=UNSET): # (pm.Exponential, 0, 1, {"lam": 1}, {}), # pymc gradient is failing miserably, figure out why (pm.HalfNormal, 0, 1, {"sigma": 1}, {}), (pm.Binomial, 0, 8, {"p": 0.5}, {"n": 10}), - (pm.Poisson, 0, 10, {"mu": 0.5}, {}), ], ) @pytest.mark.parametrize("mass", [0.5, 0.75, 0.95]) @@ -240,3 +240,41 @@ def test_find_optim_prior_input_errors(): mass=0.95, init_guess={"mu": 5, "cov": np.asarray([[1, 0.2], [0.2, 1]])}, ) + + +distribution = pm.Poisson +lower = 0 +upper = 10 +if aesara.config.floatX == "float64": + + @pytest.mark.parametrize("mass", [0.5, 0.75, 0.95]) + def test_optim_prior_poisson64(mass): + with pytest.warns(None) as record: + opt_params = pm.find_optim_prior( + distribution, + lower=lower, + upper=upper, + mass=mass, + init_guess={"mu": 0.5}, + ) + assert len(record) == 0 + + opt_distribution = distribution.dist(**opt_params) + mass_in_interval = ( + pm.math.exp(pm.logcdf(opt_distribution, upper)) + - pm.math.exp(pm.logcdf(opt_distribution, lower)) + ).eval() + assert np.abs(mass_in_interval - mass) <= 1e-5 + +elif aesara.config.floatX == "float32": + + @pytest.mark.parametrize("mass", [0.5, 0.75, 0.95]) + def test_optim_prior_poisson32(mass): + with pytest.warns(UserWarning, match="instead of the requested 95%"): + pm.find_optim_prior( + distribution, + lower=lower, + upper=upper, + mass=0.95, + init_guess={"mu": 0.5}, + ) From 16ed43862e1e63749eb4bb1ec7f2bef17751660d Mon Sep 17 00:00:00 2001 From: AlexAndorra Date: Fri, 24 Dec 2021 19:56:12 +0100 Subject: [PATCH 40/51] Use scipy default for Exponential and reactivate tests --- pymc/func_utils.py | 16 ++++++++++------ pymc/tests/test_util.py | 9 ++++++++- 2 files changed, 18 insertions(+), 7 deletions(-) diff --git a/pymc/func_utils.py b/pymc/func_utils.py index 50ff6d63f5..da276a4001 100644 --- a/pymc/func_utils.py +++ b/pymc/func_utils.py @@ -103,13 +103,17 @@ def find_optim_prior( cdf_error = (pm.math.exp(logcdf_upper) - pm.math.exp(logcdf_lower)) - mass cdf_error_fn = pm.aesaraf.compile_pymc([dist_params], cdf_error, allow_input_downcast=True) - try: - aesara_jac = pm.gradient(cdf_error, [dist_params]) - jac = pm.aesaraf.compile_pymc([dist_params], aesara_jac, allow_input_downcast=True) - # when PyMC cannot compute the gradient - # TODO: use specific gradient not implemented exception - except Exception: + # PyMC Exponential gradient is failing miserably, need to figure out why + if distribution == pm.Exponential: jac = "2-point" + else: + try: + aesara_jac = pm.gradient(cdf_error, [dist_params]) + jac = pm.aesaraf.compile_pymc([dist_params], aesara_jac, allow_input_downcast=True) + # when PyMC cannot compute the gradient + # TODO: use specific gradient, not implemented exception + except Exception: + jac = "2-point" opt = optimize.least_squares(cdf_error_fn, x0=list(init_guess.values()), jac=jac) if not opt.success: diff --git a/pymc/tests/test_util.py b/pymc/tests/test_util.py index 8d32c25b5d..b2e9f55d5b 100644 --- a/pymc/tests/test_util.py +++ b/pymc/tests/test_util.py @@ -152,7 +152,13 @@ def fn(a=UNSET): (pm.Normal, 155, 180, {"mu": 170, "sigma": 3}, {}), (pm.StudentT, 0.1, 0.4, {"mu": 10, "sigma": 3}, {"nu": 7}), (pm.StudentT, 0, 1, {"mu": 5, "sigma": 2, "nu": 7}, {}), - # (pm.Exponential, 0, 1, {"lam": 1}, {}), # pymc gradient is failing miserably, figure out why + ( + pm.Exponential, + 0, + 1, + {"lam": 1}, + {}, + ), # pymc gradient is failing miserably, figure out why (pm.HalfNormal, 0, 1, {"sigma": 1}, {}), (pm.Binomial, 0, 8, {"p": 0.5}, {"n": 10}), ], @@ -242,6 +248,7 @@ def test_find_optim_prior_input_errors(): ) +# test Poisson separately -- it's discrete and very hard to optimize precisely distribution = pm.Poisson lower = 0 upper = 10 From 1b84e18ee26892d10a104c3a22058fea0f7e5d08 Mon Sep 17 00:00:00 2001 From: AlexAndorra Date: Fri, 24 Dec 2021 20:15:19 +0100 Subject: [PATCH 41/51] Refactor Poisson tests --- pymc/tests/test_util.py | 76 ++++++++++++++++------------------------- 1 file changed, 29 insertions(+), 47 deletions(-) diff --git a/pymc/tests/test_util.py b/pymc/tests/test_util.py index b2e9f55d5b..7cd34b3bf2 100644 --- a/pymc/tests/test_util.py +++ b/pymc/tests/test_util.py @@ -12,7 +12,6 @@ # See the License for the specific language governing permissions and # limitations under the License. -import aesara import numpy as np import pytest @@ -152,13 +151,7 @@ def fn(a=UNSET): (pm.Normal, 155, 180, {"mu": 170, "sigma": 3}, {}), (pm.StudentT, 0.1, 0.4, {"mu": 10, "sigma": 3}, {"nu": 7}), (pm.StudentT, 0, 1, {"mu": 5, "sigma": 2, "nu": 7}, {}), - ( - pm.Exponential, - 0, - 1, - {"lam": 1}, - {}, - ), # pymc gradient is failing miserably, figure out why + (pm.Exponential, 0, 1, {"lam": 1}, {}), (pm.HalfNormal, 0, 1, {"sigma": 1}, {}), (pm.Binomial, 0, 8, {"p": 0.5}, {"n": 10}), ], @@ -184,6 +177,34 @@ def test_find_optim_prior(distribution, lower, upper, init_guess, fixed_params, assert np.abs(mass_in_interval - mass) <= 1e-5 +# test Poisson separately -- hard to optimize precisely when float32 +@pytest.mark.parametrize( + "lower, upper, init_guess", + [ + (1, 15, {"mu": 10}), + (19, 41, {"mu": 30}), + ], +) +def test_optim_prior_poisson(lower, upper, init_guess): + distribution = pm.Poisson + mass = 0.95 + with pytest.warns(None) as record: + opt_params = pm.find_optim_prior( + distribution, + lower=lower, + upper=upper, + init_guess=init_guess, + ) + assert len(record) == 0 + + opt_distribution = distribution.dist(**opt_params) + mass_in_interval = ( + pm.math.exp(pm.logcdf(opt_distribution, upper)) + - pm.math.exp(pm.logcdf(opt_distribution, lower)) + ).eval() + assert np.abs(mass_in_interval - mass) <= 1e-5 + + @pytest.mark.parametrize( "distribution, lower, upper, init_guess, fixed_params", [ @@ -246,42 +267,3 @@ def test_find_optim_prior_input_errors(): mass=0.95, init_guess={"mu": 5, "cov": np.asarray([[1, 0.2], [0.2, 1]])}, ) - - -# test Poisson separately -- it's discrete and very hard to optimize precisely -distribution = pm.Poisson -lower = 0 -upper = 10 -if aesara.config.floatX == "float64": - - @pytest.mark.parametrize("mass", [0.5, 0.75, 0.95]) - def test_optim_prior_poisson64(mass): - with pytest.warns(None) as record: - opt_params = pm.find_optim_prior( - distribution, - lower=lower, - upper=upper, - mass=mass, - init_guess={"mu": 0.5}, - ) - assert len(record) == 0 - - opt_distribution = distribution.dist(**opt_params) - mass_in_interval = ( - pm.math.exp(pm.logcdf(opt_distribution, upper)) - - pm.math.exp(pm.logcdf(opt_distribution, lower)) - ).eval() - assert np.abs(mass_in_interval - mass) <= 1e-5 - -elif aesara.config.floatX == "float32": - - @pytest.mark.parametrize("mass", [0.5, 0.75, 0.95]) - def test_optim_prior_poisson32(mass): - with pytest.warns(UserWarning, match="instead of the requested 95%"): - pm.find_optim_prior( - distribution, - lower=lower, - upper=upper, - mass=0.95, - init_guess={"mu": 0.5}, - ) From 6ea7861b25e3bf9a7232e2b477194ed62e9ffe2d Mon Sep 17 00:00:00 2001 From: AlexAndorra Date: Sat, 25 Dec 2021 15:59:44 +0100 Subject: [PATCH 42/51] Reduce Poisson test tol to 1% for float32 --- pymc/tests/test_util.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pymc/tests/test_util.py b/pymc/tests/test_util.py index 7cd34b3bf2..2c8e15d15b 100644 --- a/pymc/tests/test_util.py +++ b/pymc/tests/test_util.py @@ -202,7 +202,7 @@ def test_optim_prior_poisson(lower, upper, init_guess): pm.math.exp(pm.logcdf(opt_distribution, upper)) - pm.math.exp(pm.logcdf(opt_distribution, lower)) ).eval() - assert np.abs(mass_in_interval - mass) <= 1e-5 + assert np.abs(mass_in_interval - mass) <= 1e-2 # reduce to 1% tolerance for float32 @pytest.mark.parametrize( From d63b6528a751ea4f6231d7295455f4a359757cd7 Mon Sep 17 00:00:00 2001 From: AlexAndorra Date: Mon, 27 Dec 2021 12:07:28 +0100 Subject: [PATCH 43/51] Remove Exponential logic --- pymc/func_utils.py | 16 ++++++---------- pymc/tests/test_util.py | 2 +- 2 files changed, 7 insertions(+), 11 deletions(-) diff --git a/pymc/func_utils.py b/pymc/func_utils.py index da276a4001..7fa93dc7d0 100644 --- a/pymc/func_utils.py +++ b/pymc/func_utils.py @@ -103,17 +103,13 @@ def find_optim_prior( cdf_error = (pm.math.exp(logcdf_upper) - pm.math.exp(logcdf_lower)) - mass cdf_error_fn = pm.aesaraf.compile_pymc([dist_params], cdf_error, allow_input_downcast=True) - # PyMC Exponential gradient is failing miserably, need to figure out why - if distribution == pm.Exponential: + try: + aesara_jac = pm.gradient(cdf_error, [dist_params]) + jac = pm.aesaraf.compile_pymc([dist_params], aesara_jac, allow_input_downcast=True) + # when PyMC cannot compute the gradient + # TODO: use specific gradient, not implemented exception + except Exception: jac = "2-point" - else: - try: - aesara_jac = pm.gradient(cdf_error, [dist_params]) - jac = pm.aesaraf.compile_pymc([dist_params], aesara_jac, allow_input_downcast=True) - # when PyMC cannot compute the gradient - # TODO: use specific gradient, not implemented exception - except Exception: - jac = "2-point" opt = optimize.least_squares(cdf_error_fn, x0=list(init_guess.values()), jac=jac) if not opt.success: diff --git a/pymc/tests/test_util.py b/pymc/tests/test_util.py index 2c8e15d15b..bcd1ce1d48 100644 --- a/pymc/tests/test_util.py +++ b/pymc/tests/test_util.py @@ -151,7 +151,7 @@ def fn(a=UNSET): (pm.Normal, 155, 180, {"mu": 170, "sigma": 3}, {}), (pm.StudentT, 0.1, 0.4, {"mu": 10, "sigma": 3}, {"nu": 7}), (pm.StudentT, 0, 1, {"mu": 5, "sigma": 2, "nu": 7}, {}), - (pm.Exponential, 0, 1, {"lam": 1}, {}), + # (pm.Exponential, 0, 1, {"lam": 1}, {}), PyMC Exponential gradient is failing miserably, need to figure out why (pm.HalfNormal, 0, 1, {"sigma": 1}, {}), (pm.Binomial, 0, 8, {"p": 0.5}, {"n": 10}), ], From 37e62511b46a565d6029594823b46b85100cc310 Mon Sep 17 00:00:00 2001 From: AlexAndorra Date: Mon, 27 Dec 2021 12:15:24 +0100 Subject: [PATCH 44/51] Rename function --- RELEASE-NOTES.md | 2 +- pymc/__init__.py | 2 +- pymc/func_utils.py | 8 ++++---- pymc/tests/test_util.py | 14 +++++++------- 4 files changed, 13 insertions(+), 13 deletions(-) diff --git a/RELEASE-NOTES.md b/RELEASE-NOTES.md index 5bf0a6c61c..fbc2772655 100644 --- a/RELEASE-NOTES.md +++ b/RELEASE-NOTES.md @@ -114,7 +114,7 @@ This includes API changes we did not warn about since at least `3.11.0` (2021-01 - Modify how particle weights are computed. This improves accuracy of the modeled function (see [5177](https://github.com/pymc-devs/pymc3/pull/5177)). - Improve sampling, increase default number of particles [5229](https://github.com/pymc-devs/pymc3/pull/5229). - `pm.Data` now passes additional kwargs to `aesara.shared`. [#5098](https://github.com/pymc-devs/pymc/pull/5098) -- The new `pm.find_optim_prior` function can be used to find the optimal prior parameters of a distribution under some +- The new `pm.find_constrained_prior` function can be used to find optimized prior parameters of a distribution under some constraints (e.g lower and upper bound). See [#5231](https://github.com/pymc-devs/pymc/pull/5231). diff --git a/pymc/__init__.py b/pymc/__init__.py index 3de1cae9db..a55aaf2200 100644 --- a/pymc/__init__.py +++ b/pymc/__init__.py @@ -81,7 +81,7 @@ def __set_compiler_flags(): from pymc.distributions import * from pymc.distributions import transforms from pymc.exceptions import * -from pymc.func_utils import find_optim_prior +from pymc.func_utils import find_constrained_prior from pymc.math import ( expand_packed_triangular, invlogit, diff --git a/pymc/func_utils.py b/pymc/func_utils.py index 7fa93dc7d0..59677b6d13 100644 --- a/pymc/func_utils.py +++ b/pymc/func_utils.py @@ -22,10 +22,10 @@ import pymc as pm -__all__ = ["find_optim_prior"] +__all__ = ["find_constrained_prior"] -def find_optim_prior( +def find_constrained_prior( distribution: pm.Distribution, lower: float, upper: float, @@ -76,7 +76,7 @@ def find_optim_prior( # exit when any parameter is not scalar: if np.any(np.asarray(distribution.rv_op.ndims_params) != 0): raise NotImplementedError( - "`pm.find_optim_prior` does not work with non-scalar parameters yet.\n" + "`pm.find_constrained_prior` does not work with non-scalar parameters yet.\n" "Feel free to open a pull request on PyMC repo if you really need this feature." ) @@ -95,7 +95,7 @@ def find_optim_prior( logcdf_upper = pm.logcdf(dist, pm.floatX(upper)) except AttributeError: raise AttributeError( - f"You cannot use `find_optim_prior` with {distribution} -- it doesn't have a logcdf " + f"You cannot use `find_constrained_prior` with {distribution} -- it doesn't have a logcdf " "method yet.\nOpen an issue or, even better, a pull request on PyMC repo if you really " "need it." ) diff --git a/pymc/tests/test_util.py b/pymc/tests/test_util.py index bcd1ce1d48..86a79bf22f 100644 --- a/pymc/tests/test_util.py +++ b/pymc/tests/test_util.py @@ -159,7 +159,7 @@ def fn(a=UNSET): @pytest.mark.parametrize("mass", [0.5, 0.75, 0.95]) def test_find_optim_prior(distribution, lower, upper, init_guess, fixed_params, mass): with pytest.warns(None) as record: - opt_params = pm.find_optim_prior( + opt_params = pm.find_constrained_prior( distribution, lower=lower, upper=upper, @@ -189,7 +189,7 @@ def test_optim_prior_poisson(lower, upper, init_guess): distribution = pm.Poisson mass = 0.95 with pytest.warns(None) as record: - opt_params = pm.find_optim_prior( + opt_params = pm.find_constrained_prior( distribution, lower=lower, upper=upper, @@ -215,7 +215,7 @@ def test_optim_prior_poisson(lower, upper, init_guess): ) def test_find_optim_prior_error_too_large(distribution, lower, upper, init_guess, fixed_params): with pytest.warns(UserWarning, match="instead of the requested 95%"): - pm.find_optim_prior( + pm.find_constrained_prior( distribution, lower=lower, upper=upper, @@ -228,7 +228,7 @@ def test_find_optim_prior_error_too_large(distribution, lower, upper, init_guess def test_find_optim_prior_input_errors(): # missing param with pytest.raises(TypeError, match="required positional argument"): - pm.find_optim_prior( + pm.find_constrained_prior( pm.StudentT, lower=0.1, upper=0.4, @@ -238,7 +238,7 @@ def test_find_optim_prior_input_errors(): # mass too high with pytest.raises(AssertionError, match="has to be between 0.01 and 0.99"): - pm.find_optim_prior( + pm.find_constrained_prior( pm.StudentT, lower=0.1, upper=0.4, @@ -249,7 +249,7 @@ def test_find_optim_prior_input_errors(): # mass too low with pytest.raises(AssertionError, match="has to be between 0.01 and 0.99"): - pm.find_optim_prior( + pm.find_constrained_prior( pm.StudentT, lower=0.1, upper=0.4, @@ -260,7 +260,7 @@ def test_find_optim_prior_input_errors(): # non-scalar params with pytest.raises(NotImplementedError, match="does not work with non-scalar parameters yet"): - pm.find_optim_prior( + pm.find_constrained_prior( pm.MvNormal, lower=0, upper=1, From b912ac68d893fe2f35114c4d389d71eeabd1c446 Mon Sep 17 00:00:00 2001 From: AlexAndorra Date: Tue, 28 Dec 2021 17:19:12 +0100 Subject: [PATCH 45/51] Refactor test functions names --- pymc/tests/test_util.py | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/pymc/tests/test_util.py b/pymc/tests/test_util.py index 86a79bf22f..c7fecc97b6 100644 --- a/pymc/tests/test_util.py +++ b/pymc/tests/test_util.py @@ -157,7 +157,7 @@ def fn(a=UNSET): ], ) @pytest.mark.parametrize("mass", [0.5, 0.75, 0.95]) -def test_find_optim_prior(distribution, lower, upper, init_guess, fixed_params, mass): +def test_find_constrained_prior(distribution, lower, upper, init_guess, fixed_params, mass): with pytest.warns(None) as record: opt_params = pm.find_constrained_prior( distribution, @@ -185,7 +185,7 @@ def test_find_optim_prior(distribution, lower, upper, init_guess, fixed_params, (19, 41, {"mu": 30}), ], ) -def test_optim_prior_poisson(lower, upper, init_guess): +def test_constrained_prior_poisson(lower, upper, init_guess): distribution = pm.Poisson mass = 0.95 with pytest.warns(None) as record: @@ -213,7 +213,9 @@ def test_optim_prior_poisson(lower, upper, init_guess): (pm.Binomial, 0, 2, {"p": 0.8}, {"n": 10}), ], ) -def test_find_optim_prior_error_too_large(distribution, lower, upper, init_guess, fixed_params): +def test_find_constrained_prior_error_too_large( + distribution, lower, upper, init_guess, fixed_params +): with pytest.warns(UserWarning, match="instead of the requested 95%"): pm.find_constrained_prior( distribution, @@ -225,7 +227,7 @@ def test_find_optim_prior_error_too_large(distribution, lower, upper, init_guess ) -def test_find_optim_prior_input_errors(): +def test_find_constrained_prior_input_errors(): # missing param with pytest.raises(TypeError, match="required positional argument"): pm.find_constrained_prior( From d4bce39c33776ab64211c15857e0aea67e2d1cd8 Mon Sep 17 00:00:00 2001 From: AlexAndorra Date: Thu, 30 Dec 2021 13:36:52 +0100 Subject: [PATCH 46/51] Use more precise exception for gradient --- pymc/func_utils.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pymc/func_utils.py b/pymc/func_utils.py index 59677b6d13..26adf1b16d 100644 --- a/pymc/func_utils.py +++ b/pymc/func_utils.py @@ -18,6 +18,7 @@ import aesara.tensor as aet import numpy as np +from aesara.gradient import NullTypeGradError from scipy import optimize import pymc as pm @@ -107,8 +108,7 @@ def find_constrained_prior( aesara_jac = pm.gradient(cdf_error, [dist_params]) jac = pm.aesaraf.compile_pymc([dist_params], aesara_jac, allow_input_downcast=True) # when PyMC cannot compute the gradient - # TODO: use specific gradient, not implemented exception - except Exception: + except (NotImplementedError, NullTypeGradError, TypeError): jac = "2-point" opt = optimize.least_squares(cdf_error_fn, x0=list(init_guess.values()), jac=jac) From 9a512898b20ef4728c0b3c9180bb594ec68640a8 Mon Sep 17 00:00:00 2001 From: AlexAndorra Date: Mon, 3 Jan 2022 17:19:09 +0100 Subject: [PATCH 47/51] Don't catch TypeError --- pymc/func_utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pymc/func_utils.py b/pymc/func_utils.py index 26adf1b16d..5a23a8b866 100644 --- a/pymc/func_utils.py +++ b/pymc/func_utils.py @@ -108,7 +108,7 @@ def find_constrained_prior( aesara_jac = pm.gradient(cdf_error, [dist_params]) jac = pm.aesaraf.compile_pymc([dist_params], aesara_jac, allow_input_downcast=True) # when PyMC cannot compute the gradient - except (NotImplementedError, NullTypeGradError, TypeError): + except (NotImplementedError, NullTypeGradError): jac = "2-point" opt = optimize.least_squares(cdf_error_fn, x0=list(init_guess.values()), jac=jac) From 8b9ae6e0f6b802ca0c9db3f9be044f7a701ca26d Mon Sep 17 00:00:00 2001 From: AlexAndorra Date: Mon, 3 Jan 2022 17:37:07 +0100 Subject: [PATCH 48/51] Remove specific Poisson test --- pymc/tests/test_util.py | 22 ++-------------------- 1 file changed, 2 insertions(+), 20 deletions(-) diff --git a/pymc/tests/test_util.py b/pymc/tests/test_util.py index c7fecc97b6..4640c255ae 100644 --- a/pymc/tests/test_util.py +++ b/pymc/tests/test_util.py @@ -154,6 +154,8 @@ def fn(a=UNSET): # (pm.Exponential, 0, 1, {"lam": 1}, {}), PyMC Exponential gradient is failing miserably, need to figure out why (pm.HalfNormal, 0, 1, {"sigma": 1}, {}), (pm.Binomial, 0, 8, {"p": 0.5}, {"n": 10}), + (pm.Poisson, 1, 15, {"mu": 10}, {}), + (pm.Poisson, 19, 41, {"mu": 30}, {}), ], ) @pytest.mark.parametrize("mass", [0.5, 0.75, 0.95]) @@ -185,26 +187,6 @@ def test_find_constrained_prior(distribution, lower, upper, init_guess, fixed_pa (19, 41, {"mu": 30}), ], ) -def test_constrained_prior_poisson(lower, upper, init_guess): - distribution = pm.Poisson - mass = 0.95 - with pytest.warns(None) as record: - opt_params = pm.find_constrained_prior( - distribution, - lower=lower, - upper=upper, - init_guess=init_guess, - ) - assert len(record) == 0 - - opt_distribution = distribution.dist(**opt_params) - mass_in_interval = ( - pm.math.exp(pm.logcdf(opt_distribution, upper)) - - pm.math.exp(pm.logcdf(opt_distribution, lower)) - ).eval() - assert np.abs(mass_in_interval - mass) <= 1e-2 # reduce to 1% tolerance for float32 - - @pytest.mark.parametrize( "distribution, lower, upper, init_guess, fixed_params", [ From d53154adc77216a5ca9e48c6d9010f003d50cde1 Mon Sep 17 00:00:00 2001 From: AlexAndorra Date: Mon, 3 Jan 2022 18:42:44 +0100 Subject: [PATCH 49/51] Remove typo from old Poisson test --- pymc/tests/test_util.py | 8 -------- 1 file changed, 8 deletions(-) diff --git a/pymc/tests/test_util.py b/pymc/tests/test_util.py index 4640c255ae..4882cb1709 100644 --- a/pymc/tests/test_util.py +++ b/pymc/tests/test_util.py @@ -179,14 +179,6 @@ def test_find_constrained_prior(distribution, lower, upper, init_guess, fixed_pa assert np.abs(mass_in_interval - mass) <= 1e-5 -# test Poisson separately -- hard to optimize precisely when float32 -@pytest.mark.parametrize( - "lower, upper, init_guess", - [ - (1, 15, {"mu": 10}), - (19, 41, {"mu": 30}), - ], -) @pytest.mark.parametrize( "distribution, lower, upper, init_guess, fixed_params", [ From 1f42835b55cef55db1a814d8dc4b4b757e751816 Mon Sep 17 00:00:00 2001 From: AlexAndorra Date: Mon, 3 Jan 2022 19:08:42 +0100 Subject: [PATCH 50/51] Put tests for constrained priors into their own file --- pymc/tests/test_func_utils.py | 120 ++++++++++++++++++++++++++++++++++ pymc/tests/test_util.py | 101 ---------------------------- 2 files changed, 120 insertions(+), 101 deletions(-) create mode 100644 pymc/tests/test_func_utils.py diff --git a/pymc/tests/test_func_utils.py b/pymc/tests/test_func_utils.py new file mode 100644 index 0000000000..d57e7212fa --- /dev/null +++ b/pymc/tests/test_func_utils.py @@ -0,0 +1,120 @@ +# Copyright 2020 The PyMC Developers +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import numpy as np +import pytest + +import pymc as pm + + +@pytest.mark.parametrize( + "distribution, lower, upper, init_guess, fixed_params", + [ + (pm.Gamma, 0.1, 0.4, {"alpha": 1, "beta": 10}, {}), + (pm.Normal, 155, 180, {"mu": 170, "sigma": 3}, {}), + (pm.StudentT, 0.1, 0.4, {"mu": 10, "sigma": 3}, {"nu": 7}), + (pm.StudentT, 0, 1, {"mu": 5, "sigma": 2, "nu": 7}, {}), + # (pm.Exponential, 0, 1, {"lam": 1}, {}), PyMC Exponential gradient is failing miserably, + # need to figure out why + (pm.HalfNormal, 0, 1, {"sigma": 1}, {}), + (pm.Binomial, 0, 8, {"p": 0.5}, {"n": 10}), + (pm.Poisson, 1, 15, {"mu": 10}, {}), + (pm.Poisson, 19, 41, {"mu": 30}, {}), + ], +) +@pytest.mark.parametrize("mass", [0.5, 0.75, 0.95]) +def test_find_constrained_prior(distribution, lower, upper, init_guess, fixed_params, mass): + with pytest.warns(None) as record: + opt_params = pm.find_constrained_prior( + distribution, + lower=lower, + upper=upper, + mass=mass, + init_guess=init_guess, + fixed_params=fixed_params, + ) + assert len(record) == 0 + + opt_distribution = distribution.dist(**opt_params) + mass_in_interval = ( + pm.math.exp(pm.logcdf(opt_distribution, upper)) + - pm.math.exp(pm.logcdf(opt_distribution, lower)) + ).eval() + assert np.abs(mass_in_interval - mass) <= 1e-5 + + +@pytest.mark.parametrize( + "distribution, lower, upper, init_guess, fixed_params", + [ + (pm.Gamma, 0.1, 0.4, {"alpha": 1}, {"beta": 10}), + (pm.Exponential, 0.1, 1, {"lam": 1}, {}), + (pm.Binomial, 0, 2, {"p": 0.8}, {"n": 10}), + ], +) +def test_find_constrained_prior_error_too_large( + distribution, lower, upper, init_guess, fixed_params +): + with pytest.warns(UserWarning, match="instead of the requested 95%"): + pm.find_constrained_prior( + distribution, + lower=lower, + upper=upper, + mass=0.95, + init_guess=init_guess, + fixed_params=fixed_params, + ) + + +def test_find_constrained_prior_input_errors(): + # missing param + with pytest.raises(TypeError, match="required positional argument"): + pm.find_constrained_prior( + pm.StudentT, + lower=0.1, + upper=0.4, + mass=0.95, + init_guess={"mu": 170, "sigma": 3}, + ) + + # mass too high + with pytest.raises(AssertionError, match="has to be between 0.01 and 0.99"): + pm.find_constrained_prior( + pm.StudentT, + lower=0.1, + upper=0.4, + mass=0.995, + init_guess={"mu": 170, "sigma": 3}, + fixed_params={"nu": 7}, + ) + + # mass too low + with pytest.raises(AssertionError, match="has to be between 0.01 and 0.99"): + pm.find_constrained_prior( + pm.StudentT, + lower=0.1, + upper=0.4, + mass=0.005, + init_guess={"mu": 170, "sigma": 3}, + fixed_params={"nu": 7}, + ) + + # non-scalar params + with pytest.raises(NotImplementedError, match="does not work with non-scalar parameters yet"): + pm.find_constrained_prior( + pm.MvNormal, + lower=0, + upper=1, + mass=0.95, + init_guess={"mu": 5, "cov": np.asarray([[1, 0.2], [0.2, 1]])}, + ) diff --git a/pymc/tests/test_util.py b/pymc/tests/test_util.py index 4882cb1709..c0500d3b97 100644 --- a/pymc/tests/test_util.py +++ b/pymc/tests/test_util.py @@ -142,104 +142,3 @@ def fn(a=UNSET): help(fn) captured = capsys.readouterr() assert "a=UNSET" in captured.out - - -@pytest.mark.parametrize( - "distribution, lower, upper, init_guess, fixed_params", - [ - (pm.Gamma, 0.1, 0.4, {"alpha": 1, "beta": 10}, {}), - (pm.Normal, 155, 180, {"mu": 170, "sigma": 3}, {}), - (pm.StudentT, 0.1, 0.4, {"mu": 10, "sigma": 3}, {"nu": 7}), - (pm.StudentT, 0, 1, {"mu": 5, "sigma": 2, "nu": 7}, {}), - # (pm.Exponential, 0, 1, {"lam": 1}, {}), PyMC Exponential gradient is failing miserably, need to figure out why - (pm.HalfNormal, 0, 1, {"sigma": 1}, {}), - (pm.Binomial, 0, 8, {"p": 0.5}, {"n": 10}), - (pm.Poisson, 1, 15, {"mu": 10}, {}), - (pm.Poisson, 19, 41, {"mu": 30}, {}), - ], -) -@pytest.mark.parametrize("mass", [0.5, 0.75, 0.95]) -def test_find_constrained_prior(distribution, lower, upper, init_guess, fixed_params, mass): - with pytest.warns(None) as record: - opt_params = pm.find_constrained_prior( - distribution, - lower=lower, - upper=upper, - mass=mass, - init_guess=init_guess, - fixed_params=fixed_params, - ) - assert len(record) == 0 - - opt_distribution = distribution.dist(**opt_params) - mass_in_interval = ( - pm.math.exp(pm.logcdf(opt_distribution, upper)) - - pm.math.exp(pm.logcdf(opt_distribution, lower)) - ).eval() - assert np.abs(mass_in_interval - mass) <= 1e-5 - - -@pytest.mark.parametrize( - "distribution, lower, upper, init_guess, fixed_params", - [ - (pm.Gamma, 0.1, 0.4, {"alpha": 1}, {"beta": 10}), - (pm.Exponential, 0.1, 1, {"lam": 1}, {}), - (pm.Binomial, 0, 2, {"p": 0.8}, {"n": 10}), - ], -) -def test_find_constrained_prior_error_too_large( - distribution, lower, upper, init_guess, fixed_params -): - with pytest.warns(UserWarning, match="instead of the requested 95%"): - pm.find_constrained_prior( - distribution, - lower=lower, - upper=upper, - mass=0.95, - init_guess=init_guess, - fixed_params=fixed_params, - ) - - -def test_find_constrained_prior_input_errors(): - # missing param - with pytest.raises(TypeError, match="required positional argument"): - pm.find_constrained_prior( - pm.StudentT, - lower=0.1, - upper=0.4, - mass=0.95, - init_guess={"mu": 170, "sigma": 3}, - ) - - # mass too high - with pytest.raises(AssertionError, match="has to be between 0.01 and 0.99"): - pm.find_constrained_prior( - pm.StudentT, - lower=0.1, - upper=0.4, - mass=0.995, - init_guess={"mu": 170, "sigma": 3}, - fixed_params={"nu": 7}, - ) - - # mass too low - with pytest.raises(AssertionError, match="has to be between 0.01 and 0.99"): - pm.find_constrained_prior( - pm.StudentT, - lower=0.1, - upper=0.4, - mass=0.005, - init_guess={"mu": 170, "sigma": 3}, - fixed_params={"nu": 7}, - ) - - # non-scalar params - with pytest.raises(NotImplementedError, match="does not work with non-scalar parameters yet"): - pm.find_constrained_prior( - pm.MvNormal, - lower=0, - upper=1, - mass=0.95, - init_guess={"mu": 5, "cov": np.asarray([[1, 0.2], [0.2, 1]])}, - ) From bad236ccd3d3dfff130a9752d5b6cb09090739ff Mon Sep 17 00:00:00 2001 From: AlexAndorra Date: Tue, 4 Jan 2022 13:57:12 +0100 Subject: [PATCH 51/51] Add code examples in docstrings --- pymc/func_utils.py | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) diff --git a/pymc/func_utils.py b/pymc/func_utils.py index 5a23a8b866..63af83d305 100644 --- a/pymc/func_utils.py +++ b/pymc/func_utils.py @@ -68,6 +68,31 @@ def find_constrained_prior( ------- The optimized distribution parameters as a dictionary with the parameters' name as key and the optimized value as value. + + Examples + -------- + .. code-block:: python + + # get parameters obeying constraints + opt_params = pm.find_constrained_prior( + pm.Gamma, lower=0.1, upper=0.4, mass=0.75, init_guess={"alpha": 1, "beta": 10} + ) + + # use these parameters to draw random samples + samples = pm.Gamma.dist(**opt_params, size=100).eval() + + # use these parameters in a model + with pm.Model(): + x = pm.Gamma('x', **opt_params) + + # specify fixed values before optimization + opt_params = pm.find_constrained_prior( + pm.StudentT, + lower=0, + upper=1, + init_guess={"mu": 5, "sigma": 2}, + fixed_params={"nu": 7}, + ) """ assert 0.01 <= mass <= 0.99, ( "This function optimizes the mass of the given distribution +/- "