Break down tests having multiple checks and xfail decorated (#4497)

matteo-pallini · ricardoV94 · web-flow · commit d248a0e94d67 · 2021-03-09T06:41:51.000+01:00
* Break down tests having multiple checks and xfail decorated

Xfail is going to care only about the first failed assert statement and
ignore all the other ones in the same test.

Sometimes xfail is being used to decorate a test or class function calling
several other sub-functions. This makes it hard to monitor what is happening
for the sub-fuctions that are not failing (given that the failure will take
priority and mark the test function as xfailed). By breaking down each
function into individual sub-functions it's easier to monitor individual
tests behavior

* Remove xfail which seems unnecessary

Also add comment regarding test with unpredictable outcome

* Break down test with multiple sub-checks and xfail where appropriate

* Remove Scipy Xfail from tests not using sp.betabinom

* Add back xfail to potentially failing test when float32 is used

* Temporatily add n_samples=-1 to check tests behavior for all possible parameters

* Remove temporary n_samples and a couple more to do some quick tests

* Remove unnecessary comments

* Replace Xfail with skipif when using Scipy version as condition

Given that betabinom won't exist in previous versions, there should be
no chance for it's behavior to change, given that it won't exist there
is no point in monitoring the outcome of these tests, so it may be worth
skipping them. Skipping them should be faster than running them and
catching the failures

* Add missing n_samples for test_beta_binomial_selfconsistency

* Remove all the n_samples=-1 added for verifying tests robustness

* Tweak normal_logcdf and moyal tests

Temporarily set n_samples=-1 to test all permutations.

* Reintroduce 32bit xfail on Moyal logcdf, revert n_samples and merge passing normal tests

* Small renaming and reordering

* Reintroduce accidentally removed n_samples from binomial test

Co-authored-by: Ricardo &lt;ricardo.vieira1994@gmail.com&gt;
diff --git a/pymc3/tests/test_distributions.py b/pymc3/tests/test_distributions.py
@@ -770,9 +770,9 @@ def check_selfconsistency_discrete_logcdf(
                 err_msg=str(pt),
             )
 
-    def check_int_to_1(self, model, value, domain, paramdomains):
+    def check_int_to_1(self, model, value, domain, paramdomains, n_samples=10):
         pdf = model.fastfn(exp(model.logpt))
-        for pt in product(paramdomains, n_samples=10):
+        for pt in product(paramdomains, n_samples=n_samples):
             pt = Point(pt, value=value.tag.test_value, model=model)
             bij = DictToVarBijection(value, (), pt)
             pdfx = bij.mapf(pdf)
@@ -902,6 +902,7 @@ def test_normal(self):
             R,
             {"mu": R, "sigma": Rplus},
             lambda value, mu, sigma: sp.norm.logcdf(value, mu, sigma),
+            decimal=select_by_precision(float64=6, float32=2),
         )
 
     def test_truncated_normal(self):
@@ -941,25 +942,6 @@ def test_chi_squared(self):
             lambda value, nu: sp.chi2.logpdf(value, df=nu),
         )
 
-    @pytest.mark.xfail(
-        condition=(aesara.config.floatX == "float32"),
-        reason="Poor CDF in SciPy. See scipy/scipy#869 for details.",
-    )
-    def test_wald_scipy(self):
-        self.check_logp(
-            Wald,
-            Rplus,
-            {"mu": Rplus, "alpha": Rplus},
-            lambda value, mu, alpha: sp.invgauss.logpdf(value, mu=mu, loc=alpha),
-            decimal=select_by_precision(float64=6, float32=1),
-        )
-        self.check_logcdf(
-            Wald,
-            Rplus,
-            {"mu": Rplus, "alpha": Rplus},
-            lambda value, mu, alpha: sp.invgauss.logcdf(value, mu=mu, loc=alpha),
-        )
-
     @pytest.mark.parametrize(
         "value,mu,lam,phi,alpha,logp",
         [
@@ -979,7 +961,7 @@ def test_wald_scipy(self):
             (50.0, 15.0, None, 0.666666, 10.0, -5.6481874),
         ],
     )
-    def test_wald(self, value, mu, lam, phi, alpha, logp):
+    def test_wald_logp_custom_points(self, value, mu, lam, phi, alpha, logp):
         # Log probabilities calculated using the dIG function from the R package gamlss.
         # See e.g., doi: 10.1111/j.1467-9876.2005.00510.x, or
         # http://www.gamlss.org/.
@@ -989,6 +971,27 @@ def test_wald(self, value, mu, lam, phi, alpha, logp):
         decimals = select_by_precision(float64=6, float32=1)
         assert_almost_equal(model.fastlogp(pt), logp, decimal=decimals, err_msg=str(pt))
 
+    def test_wald_logp(self):
+        self.check_logp(
+            Wald,
+            Rplus,
+            {"mu": Rplus, "alpha": Rplus},
+            lambda value, mu, alpha: sp.invgauss.logpdf(value, mu=mu, loc=alpha),
+            decimal=select_by_precision(float64=6, float32=1),
+        )
+
+    @pytest.mark.xfail(
+        condition=(aesara.config.floatX == "float32"),
+        reason="Poor CDF in SciPy. See scipy/scipy#869 for details.",
+    )
+    def test_wald_logcdf(self):
+        self.check_logcdf(
+            Wald,
+            Rplus,
+            {"mu": Rplus, "alpha": Rplus},
+            lambda value, mu, alpha: sp.invgauss.logcdf(value, mu=mu, loc=alpha),
+        )
+
     def test_beta(self):
         self.check_logp(
             Beta,
@@ -1256,11 +1259,7 @@ def test_gamma_logcdf(self):
             skip_paramdomain_outside_edge_test=True,
         )
 
-    @pytest.mark.xfail(
-        condition=(aesara.config.floatX == "float32"),
-        reason="Fails on float32 due to numerical issues",
-    )
-    def test_inverse_gamma(self):
+    def test_inverse_gamma_logp(self):
         self.check_logp(
             InverseGamma,
             Rplus,
@@ -1269,6 +1268,14 @@ def test_inverse_gamma(self):
         )
         # pymc-devs/aesara#224: skip_paramdomain_outside_edge_test has to be set
         # True to avoid triggering a C-level assertion in the Aesara GammaQ function
+
+    @pytest.mark.xfail(
+        condition=(aesara.config.floatX == "float32"),
+        reason="Fails on float32 due to numerical issues",
+    )
+    def test_inverse_gamma_logcdf(self):
+        # pymc-devs/aesara#224: skip_paramdomain_outside_edge_test has to be set
+        # True to avoid triggering a C-level assertion in the Aesara GammaQ function
         # in gamma.c file. Can be set back to False (default) once that issue is solved
         self.check_logcdf(
             InverseGamma,
@@ -1313,13 +1320,19 @@ def test_pareto(self):
         condition=(aesara.config.floatX == "float32"),
         reason="Fails on float32 due to inf issues",
     )
-    def test_weibull(self):
+    def test_weibull_logp(self):
         self.check_logp(
             Weibull,
             Rplus,
             {"alpha": Rplusbig, "beta": Rplusbig},
             lambda value, alpha, beta: sp.exponweib.logpdf(value, 1, alpha, scale=beta),
         )
+
+    @pytest.mark.xfail(
+        condition=(aesara.config.floatX == "float32"),
+        reason="Fails on float32 due to inf issues",
+    )
+    def test_weibull_logcdf(self):
         self.check_logcdf(
             Weibull,
             Rplus,
@@ -1368,27 +1381,37 @@ def test_binomial(self):
 
     # Too lazy to propagate decimal parameter through the whole chain of deps
     @pytest.mark.xfail(condition=(aesara.config.floatX == "float32"), reason="Fails on float32")
-    @pytest.mark.xfail(
-        condition=(SCIPY_VERSION < parse("1.4.0")), reason="betabinom is new in Scipy 1.4.0"
-    )
-    def test_beta_binomial(self):
+    def test_beta_binomial_distribution(self):
         self.checkd(
             BetaBinomial,
             Nat,
             {"alpha": Rplus, "beta": Rplus, "n": NatSmall},
         )
+
+    @pytest.mark.skipif(
+        condition=(SCIPY_VERSION < parse("1.4.0")), reason="betabinom is new in Scipy 1.4.0"
+    )
+    def test_beta_binomial_logp(self):
         self.check_logp(
             BetaBinomial,
             Nat,
             {"alpha": Rplus, "beta": Rplus, "n": NatSmall},
             lambda value, alpha, beta, n: sp.betabinom.logpmf(value, a=alpha, b=beta, n=n),
         )
+
+    @pytest.mark.xfail(condition=(aesara.config.floatX == "float32"), reason="Fails on float32")
+    @pytest.mark.skipif(
+        condition=(SCIPY_VERSION < parse("1.4.0")), reason="betabinom is new in Scipy 1.4.0"
+    )
+    def test_beta_binomial_logcdf(self):
         self.check_logcdf(
             BetaBinomial,
             Nat,
             {"alpha": Rplus, "beta": Rplus, "n": NatSmall},
             lambda value, alpha, beta, n: sp.betabinom.logcdf(value, a=alpha, b=beta, n=n),
         )
+
+    def test_beta_binomial_selfconsistency(self):
         self.check_selfconsistency_discrete_logcdf(
             BetaBinomial,
             Nat,
@@ -1475,27 +1498,37 @@ def test_constantdist(self):
         self.check_logp(Constant, I, {"c": I}, lambda value, c: np.log(c == value))
 
     # Too lazy to propagate decimal parameter through the whole chain of deps
-    @pytest.mark.xfail(condition=(aesara.config.floatX == "float32"), reason="Fails on float32")
-    def test_zeroinflatedpoisson(self):
+    @pytest.mark.xfail(
+        condition=(aesara.config.floatX == "float32"),
+        reason="Fails on float32 due to inf issues",
+    )
+    def test_zeroinflatedpoisson_distribution(self):
         self.checkd(
             ZeroInflatedPoisson,
             Nat,
             {"theta": Rplus, "psi": Unit},
         )
+
+    def test_zeroinflatedpoisson_logcdf(self):
         self.check_selfconsistency_discrete_logcdf(
             ZeroInflatedPoisson,
             Nat,
             {"theta": Rplus, "psi": Unit},
         )
 
     # Too lazy to propagate decimal parameter through the whole chain of deps
-    @pytest.mark.xfail(condition=(aesara.config.floatX == "float32"), reason="Fails on float32")
-    def test_zeroinflatednegativebinomial(self):
+    @pytest.mark.xfail(
+        condition=(aesara.config.floatX == "float32"),
+        reason="Fails on float32 due to inf issues",
+    )
+    def test_zeroinflatednegativebinomial_distribution(self):
         self.checkd(
             ZeroInflatedNegativeBinomial,
             Nat,
             {"mu": Rplusbig, "alpha": Rplusbig, "psi": Unit},
         )
+
+    def test_zeroinflatednegativebinomial_logcdf(self):
         self.check_selfconsistency_discrete_logcdf(
             ZeroInflatedNegativeBinomial,
             Nat,
@@ -1504,13 +1537,14 @@ def test_zeroinflatednegativebinomial(self):
         )
 
     # Too lazy to propagate decimal parameter through the whole chain of deps
-    @pytest.mark.xfail(condition=(aesara.config.floatX == "float32"), reason="Fails on float32")
-    def test_zeroinflatedbinomial(self):
+    def test_zeroinflatedbinomial_distribution(self):
         self.checkd(
             ZeroInflatedBinomial,
             Nat,
             {"n": NatSmall, "p": Unit, "psi": Unit},
         )
+
+    def test_zeroinflatedbinomial_logcdf(self):
         self.check_selfconsistency_discrete_logcdf(
             ZeroInflatedBinomial,
             Nat,
@@ -2279,14 +2313,21 @@ def test_rice(self):
             lambda value, b, sigma: sp.rice.logpdf(value, b=b, loc=0, scale=sigma),
         )
 
-    @pytest.mark.xfail(condition=(aesara.config.floatX == "float32"), reason="Fails on float32")
-    def test_moyal(self):
+    def test_moyal_logp(self):
+        # Using a custom domain, because the standard `R` domain undeflows with scipy in float64
+        value_domain = Domain([-inf, -1.5, -1, -0.01, 0.0, 0.01, 1, 1.5, inf])
         self.check_logp(
             Moyal,
-            R,
+            value_domain,
             {"mu": R, "sigma": Rplusbig},
             lambda value, mu, sigma: floatX(sp.moyal.logpdf(value, mu, sigma)),
         )
+
+    @pytest.mark.xfail(
+        condition=(aesara.config.floatX == "float32"),
+        reason="Pymc3 underflows earlier than scipy on float32",
+    )
+    def test_moyal_logcdf(self):
         self.check_logcdf(
             Moyal,
             R,
diff --git a/pymc3/tests/test_examples.py b/pymc3/tests/test_examples.py
@@ -192,8 +192,8 @@ def build_disaster_model(masked=False):
     return model
 
 
-@pytest.mark.xfail(condition=(aesara.config.floatX == "float32"), reason="Fails on float32")
 class TestDisasterModel(SeededTest):
+    @pytest.mark.xfail(condition=(aesara.config.floatX == "float32"), reason="Fails on float32")
     # Time series of recorded coal mining disasters in the UK from 1851 to 1962
     def test_disaster_model(self):
         model = build_disaster_model(masked=False)
@@ -205,6 +205,7 @@ def test_disaster_model(self):
             tr = pm.sample(500, tune=50, start=start, step=step, chains=2)
             az.summary(tr)
 
+    @pytest.mark.xfail(condition=(aesara.config.floatX == "float32"), reason="Fails on float32")
     def test_disaster_model_missing(self):
         model = build_disaster_model(masked=True)
         with model:
diff --git a/pymc3/tests/test_models_utils.py b/pymc3/tests/test_models_utils.py
@@ -40,7 +40,6 @@ def test_pandas_init(self):
         m, l = utils.any_to_tensor_and_labels(self.data, labels=["x2", "x3"])
         self.assertMatrixLabels(m, l, lt=["x2", "x3"])
 
-    @pytest.mark.xfail
     def test_dict_input(self):
         m, l = utils.any_to_tensor_and_labels(self.data.to_dict("dict"))
         self.assertMatrixLabels(m, l, mt=self.data[l].values, lt=l)
@@ -51,6 +50,8 @@ def test_dict_input(self):
         m, l = utils.any_to_tensor_and_labels(self.data.to_dict("list"))
         self.assertMatrixLabels(m, l, mt=self.data[l].values, lt=l)
 
+    @pytest.mark.xfail
+    def test_dict_input_pandas_series(self):
         inp = {k: aet.as_tensor_variable(v.values) for k, v in self.data.to_dict("series").items()}
         m, l = utils.any_to_tensor_and_labels(inp)
         self.assertMatrixLabels(m, l, mt=self.data[l].values, lt=l)
diff --git a/pymc3/tests/test_step.py b/pymc3/tests/test_step.py
@@ -957,8 +957,8 @@ def test_custom_proposal_dist(self):
         pass
 
 
-@pytest.mark.xfail(condition=(aesara.config.floatX == "float32"), reason="Fails on float32")
 class TestNutsCheckTrace:
+    @pytest.mark.xfail(condition=(aesara.config.floatX == "float32"), reason="Fails on float32")
     def test_multiple_samplers(self, caplog):
         with Model():
             prob = Beta("prob", alpha=5.0, beta=3.0)
diff --git a/pymc3/tests/test_transforms.py b/pymc3/tests/test_transforms.py
@@ -235,15 +235,22 @@ def test_ordered():
     close_to_logical(np.diff(vals) >= 0, True, tol)
 
 
+def test_chain_values():
+    chain_tranf = tr.Chain([tr.logodds, tr.ordered])
+    vals = get_values(chain_tranf, Vector(R, 5), aet.dvector, np.zeros(5))
+    close_to_logical(np.diff(vals) >= 0, True, tol)
+
+
 @pytest.mark.xfail(condition=(aesara.config.floatX == "float32"), reason="Fails on float32")
-def test_chain():
+def test_chain_vector_transform():
     chain_tranf = tr.Chain([tr.logodds, tr.ordered])
     check_vector_transform(chain_tranf, UnitSortedVector(3))
 
-    check_jacobian_det(chain_tranf, Vector(R, 4), aet.dvector, np.zeros(4), elemwise=False)
 
-    vals = get_values(chain_tranf, Vector(R, 5), aet.dvector, np.zeros(5))
-    close_to_logical(np.diff(vals) >= 0, True, tol)
+@pytest.mark.xfail(condition=(aesara.config.floatX == "float32"), reason="Fails on float32")
+def test_chain_jacob_det():
+    chain_tranf = tr.Chain([tr.logodds, tr.ordered])
+    check_jacobian_det(chain_tranf, Vector(R, 4), aet.dvector, np.zeros(4), elemwise=False)
 
 
 class TestElementWiseLogp(SeededTest):