From 928959906a570cbfcf7519eab0299c65811bf9ac Mon Sep 17 00:00:00 2001 From: Ricardo Date: Tue, 29 Dec 2020 16:26:39 +0100 Subject: [PATCH 1/9] Change `check_logcdf` to test that values below or above domain are properly evaluated. Fix issues with `Uniform`, `HalfNormal`, `Gamma`, and `InverseGamma` distributions. --- pymc3/distributions/continuous.py | 34 +++++++++++++++++++++++-------- pymc3/tests/test_distributions.py | 22 ++++++++++++++++++-- 2 files changed, 46 insertions(+), 10 deletions(-) diff --git a/pymc3/distributions/continuous.py b/pymc3/distributions/continuous.py index 08102d2022..6179294146 100644 --- a/pymc3/distributions/continuous.py +++ b/pymc3/distributions/continuous.py @@ -286,13 +286,16 @@ def logcdf(self, value): ------- TensorVariable """ + lower = self.lower + upper = self.upper + return tt.switch( - tt.or_(tt.lt(value, self.lower), tt.gt(value, self.upper)), + tt.or_(tt.lt(value, lower), tt.lt(upper, lower)), -np.inf, tt.switch( - tt.eq(value, self.upper), + tt.lt(value, upper), + tt.log(value - lower) - tt.log(upper - lower), 0, - tt.log(value - self.lower) - tt.log(self.upper - self.lower), ), ) @@ -910,10 +913,10 @@ def logcdf(self, value): """ sigma = self.sigma z = zvalue(value, mu=0, sigma=sigma) - return tt.switch( - tt.lt(z, -1.0), - tt.log(tt.erfcx(-z / tt.sqrt(2.0))) - tt.sqr(z), + return bound( tt.log1p(-tt.erfc(z / tt.sqrt(2.0))), + 0 <= value, + 0 < sigma, ) @@ -2478,7 +2481,17 @@ def logcdf(self, value): """ alpha = self.alpha beta = self.beta - return bound(tt.log(tt.gammainc(alpha, beta * value)), value >= 0, alpha > 0, beta > 0) + # To avoid issue with #4340 + safe_alpha = tt.switch(tt.lt(alpha, 0), 0, alpha) + safe_beta = tt.switch(tt.lt(beta, 0), 0, beta) + safe_value = tt.switch(tt.lt(value, 0), 0, value) + + return bound( + tt.log(tt.gammainc(safe_alpha, safe_beta * safe_value)), + value >= 0, + alpha > 0, + beta > 0, + ) def _distr_parameters_for_repr(self): return ["alpha", "beta"] @@ -2642,8 +2655,13 @@ def logcdf(self, value): """ alpha = self.alpha beta = self.beta + # To avoid issue with #4340 + safe_alpha = tt.switch(tt.lt(alpha, 0), 0, alpha) + safe_beta = tt.switch(tt.lt(beta, 0), 0, beta) + safe_value = tt.switch(tt.lt(value, 0), 0, value) + return bound( - tt.log(tt.gammaincc(alpha, beta / value)), + tt.log(tt.gammaincc(safe_alpha, safe_beta / safe_value)), value >= 0, alpha > 0, beta > 0, diff --git a/pymc3/tests/test_distributions.py b/pymc3/tests/test_distributions.py index 9a21e00c15..36bf3f2aee 100644 --- a/pymc3/tests/test_distributions.py +++ b/pymc3/tests/test_distributions.py @@ -597,6 +597,24 @@ def check_selfconsistency_discrete_logcdf( err_msg=str(pt), ) + # Test that values below domain evaluate to -np.inf + if np.isfinite(domain.lower): + below_domain = domain.lower - 1 + assert_equal( + dist.logcdf(below_domain).tag.test_value, + -np.inf, + err_msg=str(below_domain), + ) + + # Test that values above domain evaluate to 0 + if np.isfinite(domain.upper): + above_domain = domain.upper + 1 + assert_equal( + dist.logcdf(above_domain).tag.test_value, + 0, + err_msg=str(above_domain), + ) + def check_int_to_1(self, model, value, domain, paramdomains): pdf = model.fastfn(exp(model.logpt)) for pt in product(paramdomains, n_samples=10): @@ -681,7 +699,7 @@ def test_flat(self): with Model(): x = Flat("a") assert_allclose(x.tag.test_value, 0) - self.check_logcdf(Flat, Runif, {}, lambda value: np.log(0.5)) + self.check_logcdf(Flat, R, {}, lambda value: np.log(0.5)) # Check infinite cases individually. assert 0.0 == Flat.dist().logcdf(np.inf).tag.test_value assert -np.inf == Flat.dist().logcdf(-np.inf).tag.test_value @@ -692,7 +710,7 @@ def test_half_flat(self): x = HalfFlat("a", shape=2) assert_allclose(x.tag.test_value, 1) assert x.tag.test_value.shape == (2,) - self.check_logcdf(HalfFlat, Runif, {}, lambda value: -np.inf) + self.check_logcdf(HalfFlat, Rplus, {}, lambda value: -np.inf) # Check infinite cases individually. assert 0.0 == HalfFlat.dist().logcdf(np.inf).tag.test_value assert -np.inf == HalfFlat.dist().logcdf(-np.inf).tag.test_value From 07f09ad6ee4d94f6e501c4a54f17b460832ee9fd Mon Sep 17 00:00:00 2001 From: Ricardo Date: Thu, 31 Dec 2020 13:11:27 +0100 Subject: [PATCH 2/9] Add multiple value test for logcdf. Add more informative comment for Gamma and InverseGamma hack. Update Release note. --- RELEASE-NOTES.md | 1 + pymc3/distributions/continuous.py | 63 ++++++++++++++++++++++--------- pymc3/tests/test_distributions.py | 9 +++++ 3 files changed, 56 insertions(+), 17 deletions(-) diff --git a/RELEASE-NOTES.md b/RELEASE-NOTES.md index 35ae968c54..4732a96889 100644 --- a/RELEASE-NOTES.md +++ b/RELEASE-NOTES.md @@ -29,6 +29,7 @@ It also brings some dreadfully awaited fixes, so be sure to go through the chang - Fixed mathematical formulation in `MvStudentT` random method. (see [#4359](https://github.com/pymc-devs/pymc3/pull/4359)) - Fix issue in `logp` method of `HyperGeometric`. It now returns `-inf` for invalid parameters (see [4367](https://github.com/pymc-devs/pymc3/pull/4367)) - Fixed `MatrixNormal` random method to work with parameters as random variables. (see [#4368](https://github.com/pymc-devs/pymc3/pull/4368)) +- Fix issue in `logcdf` methods of `Uniform`, `HalfNormal`, `Gamma` and `InverseGamma`. These functions now return correct values when evaluated with invalid parameters or values (see [4393](https://github.com/pymc-devs/pymc3/pull/4393)) ## PyMC3 3.10.0 (7 December 2020) diff --git a/pymc3/distributions/continuous.py b/pymc3/distributions/continuous.py index 6179294146..51a5be05b0 100644 --- a/pymc3/distributions/continuous.py +++ b/pymc3/distributions/continuous.py @@ -290,7 +290,7 @@ def logcdf(self, value): upper = self.upper return tt.switch( - tt.or_(tt.lt(value, lower), tt.lt(upper, lower)), + tt.lt(value, lower) | tt.lt(upper, lower), -np.inf, tt.switch( tt.lt(value, upper), @@ -1307,13 +1307,26 @@ def logcdf(self, value): ------- TensorVariable """ - value = floatX(tt.as_tensor(value)) - a = floatX(tt.as_tensor(self.alpha)) - b = floatX(tt.as_tensor(self.beta)) - return tt.switch( - tt.le(value, 0), - -np.inf, - tt.switch(tt.ge(value, 1), 0, tt.log(incomplete_beta(a, b, value))), + # incomplete_beta function can only handle scalar values (see #4342) + if np.ndim(value): + raise TypeError( + "Beta.logcdf expects a scalar value but received a {}-dimensional object.".format( + np.ndim(value) + ) + ) + + a = self.alpha + b = self.beta + + return bound( + tt.switch( + tt.lt(value, 1), + tt.log(incomplete_beta(a, b, value)), + 0 + ), + 0 <= value, + 0 < a, + 0 < b, ) def _distr_parameters_for_repr(self): @@ -1965,13 +1978,28 @@ def logcdf(self, value): ------- TensorVariable """ + # incomplete_beta function can only handle scalar values (see #4342) + if np.ndim(value): + raise TypeError( + "StudentT.logcdf expects a scalar value but received a {}-dimensional object.".format( + np.ndim(value) + ) + ) + nu = self.nu mu = self.mu sigma = self.sigma + lam = self.lam t = (value - mu) / sigma sqrt_t2_nu = tt.sqrt(t ** 2 + nu) x = (t + sqrt_t2_nu) / (2.0 * sqrt_t2_nu) - return tt.log(incomplete_beta(nu / 2.0, nu / 2.0, x)) + + return bound( + tt.log(incomplete_beta(nu / 2.0, nu / 2.0, x)), + 0 < nu, + 0 < sigma, + 0 < lam, + ) class Pareto(Continuous): @@ -2481,16 +2509,16 @@ def logcdf(self, value): """ alpha = self.alpha beta = self.beta - # To avoid issue with #4340 + # To avoid gammainc C-assertion when given invalid values (#4340) safe_alpha = tt.switch(tt.lt(alpha, 0), 0, alpha) safe_beta = tt.switch(tt.lt(beta, 0), 0, beta) safe_value = tt.switch(tt.lt(value, 0), 0, value) return bound( tt.log(tt.gammainc(safe_alpha, safe_beta * safe_value)), - value >= 0, - alpha > 0, - beta > 0, + 0 <= value, + 0 < alpha, + 0 < beta, ) def _distr_parameters_for_repr(self): @@ -2655,16 +2683,16 @@ def logcdf(self, value): """ alpha = self.alpha beta = self.beta - # To avoid issue with #4340 + # To avoid gammaincc C-assertion when given invalid values (#4340) safe_alpha = tt.switch(tt.lt(alpha, 0), 0, alpha) safe_beta = tt.switch(tt.lt(beta, 0), 0, beta) safe_value = tt.switch(tt.lt(value, 0), 0, value) return bound( tt.log(tt.gammaincc(safe_alpha, safe_beta / safe_value)), - value >= 0, - alpha > 0, - beta > 0, + 0 <= value, + 0 < alpha, + 0 < beta, ) @@ -3532,6 +3560,7 @@ def logcdf(self, value): l = self.lower u = self.upper c = self.c + return tt.switch( tt.le(value, l), -np.inf, diff --git a/pymc3/tests/test_distributions.py b/pymc3/tests/test_distributions.py index 36bf3f2aee..f0c4dd9dc8 100644 --- a/pymc3/tests/test_distributions.py +++ b/pymc3/tests/test_distributions.py @@ -615,6 +615,15 @@ def check_selfconsistency_discrete_logcdf( err_msg=str(above_domain), ) + # TODO: Test that logcdf wih invalid parameters is always evaluated to -inf + + # Test that method works with multiple values or raises informative TypeError + try: + dist.logcdf(np.array([value, value])).tag.test_value + except TypeError as err: + if not str(err).endswith(".logcdf expects a scalar value but received a 1-dimensional object."): + raise + def check_int_to_1(self, model, value, domain, paramdomains): pdf = model.fastfn(exp(model.logpt)) for pt in product(paramdomains, n_samples=10): From b91d1c120b41b8bfb7658bc87b569c5a1e272423 Mon Sep 17 00:00:00 2001 From: Ricardo Date: Sat, 2 Jan 2021 12:29:00 +0100 Subject: [PATCH 3/9] Update release note --- RELEASE-NOTES.md | 2 +- pymc3/distributions/continuous.py | 1 - 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/RELEASE-NOTES.md b/RELEASE-NOTES.md index 4732a96889..af88f7bd5b 100644 --- a/RELEASE-NOTES.md +++ b/RELEASE-NOTES.md @@ -29,7 +29,7 @@ It also brings some dreadfully awaited fixes, so be sure to go through the chang - Fixed mathematical formulation in `MvStudentT` random method. (see [#4359](https://github.com/pymc-devs/pymc3/pull/4359)) - Fix issue in `logp` method of `HyperGeometric`. It now returns `-inf` for invalid parameters (see [4367](https://github.com/pymc-devs/pymc3/pull/4367)) - Fixed `MatrixNormal` random method to work with parameters as random variables. (see [#4368](https://github.com/pymc-devs/pymc3/pull/4368)) -- Fix issue in `logcdf` methods of `Uniform`, `HalfNormal`, `Gamma` and `InverseGamma`. These functions now return correct values when evaluated with invalid parameters or values (see [4393](https://github.com/pymc-devs/pymc3/pull/4393)) +- Update the `logcdf` method of several continuous distributions to return -inf for invalid parameters and values, and raise informative errors when multiple values cannot be evaluated. (see [4393](https://github.com/pymc-devs/pymc3/pull/4393)) ## PyMC3 3.10.0 (7 December 2020) diff --git a/pymc3/distributions/continuous.py b/pymc3/distributions/continuous.py index 51a5be05b0..25b42886fb 100644 --- a/pymc3/distributions/continuous.py +++ b/pymc3/distributions/continuous.py @@ -3560,7 +3560,6 @@ def logcdf(self, value): l = self.lower u = self.upper c = self.c - return tt.switch( tt.le(value, l), -np.inf, From d0316b550f700070d66cbbbd6479ea3757030a5e Mon Sep 17 00:00:00 2001 From: Ricardo Date: Sat, 2 Jan 2021 12:37:09 +0100 Subject: [PATCH 4/9] Update docstrings with valid value types --- pymc3/distributions/continuous.py | 48 +++++++++++++++---------------- pymc3/distributions/discrete.py | 12 ++++---- pymc3/tests/test_distributions.py | 4 ++- 3 files changed, 32 insertions(+), 32 deletions(-) diff --git a/pymc3/distributions/continuous.py b/pymc3/distributions/continuous.py index 25b42886fb..aaeff90a87 100644 --- a/pymc3/distributions/continuous.py +++ b/pymc3/distributions/continuous.py @@ -278,7 +278,7 @@ def logcdf(self, value): Parameters ---------- - value: numeric + value: numeric or np.ndarray or theano.tensor Value(s) for which log CDF is calculated. If the log CDF for multiple values are desired the values must be provided in a numpy array or theano tensor. @@ -347,7 +347,7 @@ def logcdf(self, value): Parameters ---------- - value: numeric + value: numeric or np.ndarray or theano.tensor Value(s) for which log CDF is calculated. If the log CDF for multiple values are desired the values must be provided in a numpy array or theano tensor. @@ -404,7 +404,7 @@ def logcdf(self, value): Parameters ---------- - value: numeric + value: numeric or np.ndarray or theano.tensor Value(s) for which log CDF is calculated. If the log CDF for multiple values are desired the values must be provided in a numpy array or theano tensor. @@ -545,7 +545,7 @@ def logcdf(self, value): Parameters ---------- - value: numeric + value: numeric or np.ndarray or theano.tensor Value(s) for which log CDF is calculated. If the log CDF for multiple values are desired the values must be provided in a numpy array or theano tensor. @@ -903,7 +903,7 @@ def logcdf(self, value): Parameters ---------- - value: numeric + value: numeric or np.ndarray or theano.tensor Value(s) for which log CDF is calculated. If the log CDF for multiple values are desired the values must be provided in a numpy array or theano tensor. @@ -1109,7 +1109,7 @@ def logcdf(self, value): Parameters ---------- - value: numeric + value: numeric or np.ndarray or theano.tensor Value(s) for which log CDF is calculated. If the log CDF for multiple values are desired the values must be provided in a numpy array or theano tensor. @@ -1300,8 +1300,7 @@ def logcdf(self, value): Parameters ---------- value: numeric - Value(s) for which log CDF is calculated. If the log CDF for multiple - values are desired the values must be provided in a numpy array or theano tensor. + Value(s) for which log CDF is calculated. Returns ------- @@ -1322,7 +1321,7 @@ def logcdf(self, value): tt.switch( tt.lt(value, 1), tt.log(incomplete_beta(a, b, value)), - 0 + 0, ), 0 <= value, 0 < a, @@ -1537,7 +1536,7 @@ def logcdf(self, value): Parameters ---------- - value: numeric + value: numeric or np.ndarray or theano.tensor Value(s) for which log CDF is calculated. If the log CDF for multiple values are desired the values must be provided in a numpy array or theano tensor. @@ -1652,7 +1651,7 @@ def logcdf(self, value): Parameters ---------- - value: numeric + value: numeric or np.ndarray or theano.tensor Value(s) for which log CDF is calculated. If the log CDF for multiple values are desired the values must be provided in a numpy array or theano tensor. @@ -1808,7 +1807,7 @@ def logcdf(self, value): Parameters ---------- - value: numeric + value: numeric or np.ndarray or theano.tensor Value(s) for which log CDF is calculated. If the log CDF for multiple values are desired the values must be provided in a numpy array or theano tensor. @@ -1971,8 +1970,7 @@ def logcdf(self, value): Parameters ---------- value: numeric - Value(s) for which log CDF is calculated. If the log CDF for multiple - values are desired the values must be provided in a numpy array or theano tensor. + Value(s) for which log CDF is calculated. Returns ------- @@ -2121,7 +2119,7 @@ def logcdf(self, value): Parameters ---------- - value: numeric + value: numeric or np.ndarray or theano.tensor Value(s) for which log CDF is calculated. If the log CDF for multiple values are desired the values must be provided in a numpy array or theano tensor. @@ -2240,7 +2238,7 @@ def logcdf(self, value): Parameters ---------- - value: numeric + value: numeric or np.ndarray or theano.tensor Value(s) for which log CDF is calculated. If the log CDF for multiple values are desired the values must be provided in a numpy array or theano tensor. @@ -2348,7 +2346,7 @@ def logcdf(self, value): Parameters ---------- - value: numeric + value: numeric or np.ndarray or theano.tensor Value(s) for which log CDF is calculated. If the log CDF for multiple values are desired the values must be provided in a numpy array or theano tensor. @@ -2499,7 +2497,7 @@ def logcdf(self, value): Parameters ---------- - value: numeric + value: numeric or np.ndarray or theano.tensor Value(s) for which log CDF is calculated. If the log CDF for multiple values are desired the values must be provided in a numpy array or theano tensor. @@ -2673,7 +2671,7 @@ def logcdf(self, value): Parameters ---------- - value: numeric + value: numeric or np.ndarray or theano.tensor Value(s) for which log CDF is calculated. If the log CDF for multiple values are desired the values must be provided in a numpy array or theano tensor. @@ -2860,7 +2858,7 @@ def logcdf(self, value): Parameters ---------- - value: numeric + value: numeric or np.ndarray or theano.tensor Value(s) for which log CDF is calculated. If the log CDF for multiple values are desired the values must be provided in a numpy array or theano tensor. @@ -3160,7 +3158,7 @@ def logcdf(self, value): Parameters ---------- - value: numeric + value: numeric or np.ndarray or theano.tensor Value(s) for which log CDF is calculated. If the log CDF for multiple values are desired the values must be provided in a numpy array or theano tensor. @@ -3549,7 +3547,7 @@ def logcdf(self, value): Parameters ---------- - value: numeric + value: numeric or np.ndarray or theano.tensor Value(s) for which log CDF is calculated. If the log CDF for multiple values are desired the values must be provided in a numpy array or theano tensor. @@ -3678,7 +3676,7 @@ def logcdf(self, value): Parameters ---------- - value: numeric + value: numeric or np.ndarray or theano.tensor Value(s) for which log CDF is calculated. If the log CDF for multiple values are desired the values must be provided in a numpy array or theano tensor. @@ -3960,7 +3958,7 @@ def logcdf(self, value): Parameters ---------- - value: numeric + value: numeric or np.ndarray or theano.tensor Value(s) for which log CDF is calculated. If the log CDF for multiple values are desired the values must be provided in a numpy array or theano tensor. @@ -4302,7 +4300,7 @@ def logcdf(self, value): Parameters ---------- - value: numeric + value: numeric or np.ndarray or theano.tensor Value(s) for which log CDF is calculated. If the log CDF for multiple values are desired the values must be provided in a numpy array or theano tensor. diff --git a/pymc3/distributions/discrete.py b/pymc3/distributions/discrete.py index d1dc858f9c..23d56e0b53 100644 --- a/pymc3/distributions/discrete.py +++ b/pymc3/distributions/discrete.py @@ -463,7 +463,7 @@ def logcdf(self, value): Parameters ---------- - value: numeric + value: numeric or np.ndarray or theano.tensor Value(s) for which log CDF is calculated. If the log CDF for multiple values are desired the values must be provided in a numpy array or theano tensor. @@ -602,7 +602,7 @@ def logcdf(self, value): Parameters ---------- - value: numeric + value: numeric or np.ndarray or theano.tensor Value(s) for which log CDF is calculated. If the log CDF for multiple values are desired the values must be provided in a numpy array or theano tensor. @@ -717,7 +717,7 @@ def logcdf(self, value): Parameters ---------- - value: numeric + value: numeric or np.ndarray or theano.tensor Value(s) for which log CDF is calculated. If the log CDF for multiple values are desired the values must be provided in a numpy array or theano tensor. @@ -1017,7 +1017,7 @@ def logcdf(self, value): Parameters ---------- - value: numeric + value: numeric or np.ndarray or theano.tensor Value(s) for which log CDF is calculated. If the log CDF for multiple values are desired the values must be provided in a numpy array or theano tensor. @@ -1288,7 +1288,7 @@ def logcdf(self, value): Parameters ---------- - value: numeric + value: numeric or np.ndarray or theano.tensor Value(s) for which log CDF is calculated. If the log CDF for multiple values are desired the values must be provided in a numpy array or theano tensor. @@ -1601,7 +1601,7 @@ def logcdf(self, value): Parameters ---------- - value: numeric + value: numeric or np.ndarray or theano.tensor Value(s) for which log CDF is calculated. If the log CDF for multiple values are desired the values must be provided in a numpy array or theano tensor. diff --git a/pymc3/tests/test_distributions.py b/pymc3/tests/test_distributions.py index f0c4dd9dc8..3984d8bfc8 100644 --- a/pymc3/tests/test_distributions.py +++ b/pymc3/tests/test_distributions.py @@ -621,7 +621,9 @@ def check_selfconsistency_discrete_logcdf( try: dist.logcdf(np.array([value, value])).tag.test_value except TypeError as err: - if not str(err).endswith(".logcdf expects a scalar value but received a 1-dimensional object."): + if not str(err).endswith( + ".logcdf expects a scalar value but received a 1-dimensional object." + ): raise def check_int_to_1(self, model, value, domain, paramdomains): From 335b7ebe1926d0c59a33eb7ae0263e2f4685b97e Mon Sep 17 00:00:00 2001 From: ricardoV94 <28983449+ricardoV94@users.noreply.github.com> Date: Sat, 2 Jan 2021 12:59:19 +0100 Subject: [PATCH 5/9] Update RELEASE-NOTES.md --- RELEASE-NOTES.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/RELEASE-NOTES.md b/RELEASE-NOTES.md index af88f7bd5b..ebe696e824 100644 --- a/RELEASE-NOTES.md +++ b/RELEASE-NOTES.md @@ -29,7 +29,7 @@ It also brings some dreadfully awaited fixes, so be sure to go through the chang - Fixed mathematical formulation in `MvStudentT` random method. (see [#4359](https://github.com/pymc-devs/pymc3/pull/4359)) - Fix issue in `logp` method of `HyperGeometric`. It now returns `-inf` for invalid parameters (see [4367](https://github.com/pymc-devs/pymc3/pull/4367)) - Fixed `MatrixNormal` random method to work with parameters as random variables. (see [#4368](https://github.com/pymc-devs/pymc3/pull/4368)) -- Update the `logcdf` method of several continuous distributions to return -inf for invalid parameters and values, and raise informative errors when multiple values cannot be evaluated. (see [4393](https://github.com/pymc-devs/pymc3/pull/4393)) +- Update the `logcdf` method of several continuous distributions to return -inf for invalid parameters and values, and raise an informative error when multiple values cannot be evaluated in a single call. (see [4393](https://github.com/pymc-devs/pymc3/pull/4393)) ## PyMC3 3.10.0 (7 December 2020) From 4fa834a3924af4fc4644fac6bca06c7a745f56f7 Mon Sep 17 00:00:00 2001 From: Ricardo Date: Sat, 2 Jan 2021 14:37:15 +0100 Subject: [PATCH 6/9] Add more informative comments and remove TODO --- pymc3/distributions/continuous.py | 4 ++-- pymc3/distributions/discrete.py | 2 +- pymc3/tests/test_distributions.py | 2 -- 3 files changed, 3 insertions(+), 5 deletions(-) diff --git a/pymc3/distributions/continuous.py b/pymc3/distributions/continuous.py index aaeff90a87..4769668d74 100644 --- a/pymc3/distributions/continuous.py +++ b/pymc3/distributions/continuous.py @@ -2507,7 +2507,7 @@ def logcdf(self, value): """ alpha = self.alpha beta = self.beta - # To avoid gammainc C-assertion when given invalid values (#4340) + # Avoid C-assertion when the gammainc function is called with invalid values (#4340) safe_alpha = tt.switch(tt.lt(alpha, 0), 0, alpha) safe_beta = tt.switch(tt.lt(beta, 0), 0, beta) safe_value = tt.switch(tt.lt(value, 0), 0, value) @@ -2681,7 +2681,7 @@ def logcdf(self, value): """ alpha = self.alpha beta = self.beta - # To avoid gammaincc C-assertion when given invalid values (#4340) + # Avoid C-assertion when the gammaincc function is called with invalid values (#4340) safe_alpha = tt.switch(tt.lt(alpha, 0), 0, alpha) safe_beta = tt.switch(tt.lt(beta, 0), 0, beta) safe_value = tt.switch(tt.lt(value, 0), 0, value) diff --git a/pymc3/distributions/discrete.py b/pymc3/distributions/discrete.py index 23d56e0b53..7c5e476fa6 100644 --- a/pymc3/distributions/discrete.py +++ b/pymc3/distributions/discrete.py @@ -727,7 +727,7 @@ def logcdf(self, value): """ mu = self.mu value = tt.floor(value) - # To avoid gammaincc C-assertion when given invalid values (#4340) + # Avoid C-assertion when the gammaincc function is called with invalid values (#4340) safe_mu = tt.switch(tt.lt(mu, 0), 0, mu) safe_value = tt.switch(tt.lt(value, 0), 0, value) diff --git a/pymc3/tests/test_distributions.py b/pymc3/tests/test_distributions.py index 3984d8bfc8..3c26b8651b 100644 --- a/pymc3/tests/test_distributions.py +++ b/pymc3/tests/test_distributions.py @@ -615,8 +615,6 @@ def check_selfconsistency_discrete_logcdf( err_msg=str(above_domain), ) - # TODO: Test that logcdf wih invalid parameters is always evaluated to -inf - # Test that method works with multiple values or raises informative TypeError try: dist.logcdf(np.array([value, value])).tag.test_value From 7c3fb122f3696c43aa70cefd86d1485bb4f9f804 Mon Sep 17 00:00:00 2001 From: Ricardo Date: Sat, 2 Jan 2021 14:49:39 +0100 Subject: [PATCH 7/9] TypeError: format -> f-strings --- pymc3/distributions/continuous.py | 8 ++------ pymc3/distributions/discrete.py | 24 ++++++------------------ 2 files changed, 8 insertions(+), 24 deletions(-) diff --git a/pymc3/distributions/continuous.py b/pymc3/distributions/continuous.py index 4769668d74..5653c2fd15 100644 --- a/pymc3/distributions/continuous.py +++ b/pymc3/distributions/continuous.py @@ -1309,9 +1309,7 @@ def logcdf(self, value): # incomplete_beta function can only handle scalar values (see #4342) if np.ndim(value): raise TypeError( - "Beta.logcdf expects a scalar value but received a {}-dimensional object.".format( - np.ndim(value) - ) + f"Beta.logcdf expects a scalar value but received a {np.ndim(value)}-dimensional object." ) a = self.alpha @@ -1979,9 +1977,7 @@ def logcdf(self, value): # incomplete_beta function can only handle scalar values (see #4342) if np.ndim(value): raise TypeError( - "StudentT.logcdf expects a scalar value but received a {}-dimensional object.".format( - np.ndim(value) - ) + f"StudentT.logcdf expects a scalar value but received a {np.ndim(value)}-dimensional object." ) nu = self.nu diff --git a/pymc3/distributions/discrete.py b/pymc3/distributions/discrete.py index 7c5e476fa6..5276334964 100644 --- a/pymc3/distributions/discrete.py +++ b/pymc3/distributions/discrete.py @@ -166,9 +166,7 @@ def logcdf(self, value): # incomplete_beta function can only handle scalar values (see #4342) if np.ndim(value): raise TypeError( - "Binomial.logcdf expects a scalar value but received a {}-dimensional object.".format( - np.ndim(value) - ) + f"Binomial.logcdf expects a scalar value but received a {np.ndim(value)}-dimensional object." ) n = self.n @@ -336,9 +334,7 @@ def logcdf(self, value): # logcdf can only handle scalar values at the moment if np.ndim(value): raise TypeError( - "BetaBinomial.logcdf expects a scalar value but received a {}-dimensional object.".format( - np.ndim(value) - ) + f"BetaBinomial.logcdf expects a scalar value but received a {np.ndim(value)}-dimensional object." ) alpha = self.alpha @@ -910,9 +906,7 @@ def logcdf(self, value): # incomplete_beta function can only handle scalar values (see #4342) if np.ndim(value): raise TypeError( - "NegativeBinomial.logcdf expects a scalar value but received a {}-dimensional object.".format( - np.ndim(value) - ) + f"NegativeBinomial.logcdf expects a scalar value but received a {np.ndim(value)}-dimensional object." ) # TODO: avoid `p` recomputation if distribution was defined in terms of `p` @@ -1166,9 +1160,7 @@ def logcdf(self, value): # logcdf can only handle scalar values at the moment if np.ndim(value): raise TypeError( - "BetaBinomial.logcdf expects a scalar value but received a {}-dimensional object.".format( - np.ndim(value) - ) + f"HyperGeometric.logcdf expects a scalar value but received a {np.ndim(value)}-dimensional object." ) # TODO: Use lower upper in locgdf for smarter logsumexp? @@ -1743,9 +1735,7 @@ def logcdf(self, value): # logcdf can only handle scalar values due to limitation in Binomial.logcdf if np.ndim(value): raise TypeError( - "ZeroInflatedBinomial.logcdf expects a scalar value but received a {}-dimensional object.".format( - np.ndim(value) - ) + f"ZeroInflatedBinomial.logcdf expects a scalar value but received a {np.ndim(value)}-dimensional object." ) psi = self.psi @@ -1913,9 +1903,7 @@ def logcdf(self, value): # logcdf can only handle scalar values due to limitation in NegativeBinomial.logcdf if np.ndim(value): raise TypeError( - "ZeroInflatedNegativeBinomial.logcdf expects a scalar value but received a {}-dimensional object.".format( - np.ndim(value) - ) + f"ZeroInflatedNegativeBinomial.logcdf expects a scalar value but received a {np.ndim(value)}-dimensional object." ) psi = self.psi From 8c51062a728ced3c99a00339a6ef8ace901add02 Mon Sep 17 00:00:00 2001 From: Ricardo Date: Sat, 2 Jan 2021 18:03:30 +0100 Subject: [PATCH 8/9] Ignore finite upper limit in Nat domains. Move new checks to `check_logcdf`. --- pymc3/tests/test_distributions.py | 48 ++++++++++++++++--------------- 1 file changed, 25 insertions(+), 23 deletions(-) diff --git a/pymc3/tests/test_distributions.py b/pymc3/tests/test_distributions.py index 3c26b8651b..a2d00a2d60 100644 --- a/pymc3/tests/test_distributions.py +++ b/pymc3/tests/test_distributions.py @@ -575,28 +575,6 @@ def check_logcdf( err_msg=str(pt), ) - def check_selfconsistency_discrete_logcdf( - self, distribution, domain, paramdomains, decimal=None, n_samples=100 - ): - """ - Check that logcdf of discrete distributions matches sum of logps up to value - """ - domains = paramdomains.copy() - domains["value"] = domain - if decimal is None: - decimal = select_by_precision(float64=6, float32=3) - for pt in product(domains, n_samples=n_samples): - params = dict(pt) - value = params.pop("value") - values = np.arange(domain.lower, value + 1) - dist = distribution.dist(**params) - assert_almost_equal( - dist.logcdf(value).tag.test_value, - logsumexp(dist.logp(values), keepdims=False).tag.test_value, - decimal=decimal, - err_msg=str(pt), - ) - # Test that values below domain evaluate to -np.inf if np.isfinite(domain.lower): below_domain = domain.lower - 1 @@ -607,7 +585,9 @@ def check_selfconsistency_discrete_logcdf( ) # Test that values above domain evaluate to 0 - if np.isfinite(domain.upper): + # Natural domains do not have inf as the upper edge, but should also be ignored + not_nat_domain = domain not in (NatSmall, Nat, NatBig, PosNat) + if not_nat_domain and np.isfinite(domain.upper): above_domain = domain.upper + 1 assert_equal( dist.logcdf(above_domain).tag.test_value, @@ -624,6 +604,28 @@ def check_selfconsistency_discrete_logcdf( ): raise + def check_selfconsistency_discrete_logcdf( + self, distribution, domain, paramdomains, decimal=None, n_samples=100 + ): + """ + Check that logcdf of discrete distributions matches sum of logps up to value + """ + domains = paramdomains.copy() + domains["value"] = domain + if decimal is None: + decimal = select_by_precision(float64=6, float32=3) + for pt in product(domains, n_samples=n_samples): + params = dict(pt) + value = params.pop("value") + values = np.arange(domain.lower, value + 1) + dist = distribution.dist(**params) + assert_almost_equal( + dist.logcdf(value).tag.test_value, + logsumexp(dist.logp(values), keepdims=False).tag.test_value, + decimal=decimal, + err_msg=str(pt), + ) + def check_int_to_1(self, model, value, domain, paramdomains): pdf = model.fastfn(exp(model.logpt)) for pt in product(paramdomains, n_samples=10): From 4929b64c944f45b9d324283454666a41f29a92aa Mon Sep 17 00:00:00 2001 From: Ricardo Date: Sat, 2 Jan 2021 19:32:28 +0100 Subject: [PATCH 9/9] Use `tt.switch` in `DiscreteUniform` for hard boundary (addresses previously failing test in 32bit OS) --- pymc3/distributions/discrete.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/pymc3/distributions/discrete.py b/pymc3/distributions/discrete.py index 5276334964..1710b12440 100644 --- a/pymc3/distributions/discrete.py +++ b/pymc3/distributions/discrete.py @@ -1292,7 +1292,11 @@ def logcdf(self, value): lower = self.lower return bound( - tt.log(tt.minimum(tt.floor(value), upper) - lower + 1) - tt.log(upper - lower + 1), + tt.switch( + tt.lt(value, upper), + tt.log(tt.minimum(tt.floor(value), upper) - lower + 1) - tt.log(upper - lower + 1), + 0, + ), lower <= value, lower <= upper, )