Revert "Add numerically safe ordered probit distribution. (#4232)"

Spaak · web-flow · commit e47c4c01d69b · 2020-12-16T10:59:46.000+01:00
This reverts commit 9dbf9ea.
diff --git a/RELEASE-NOTES.md b/RELEASE-NOTES.md
@@ -9,8 +9,6 @@ This is the first release to support Python3.9 and to drop Python3.6.
 - Make `sample_shape` same across all contexts in `draw_values` (see [#4305](https://github.com/pymc-devs/pymc3/pull/4305)).
 - Removed `theanof.set_theano_config` because it illegally touched Theano's privates (see [#4329](https://github.com/pymc-devs/pymc3/pull/4329)).
 
-### New Features
-- `OrderedProbit` distribution added (see [#4232](https://github.com/pymc-devs/pymc3/pull/4232)).
 
 ## PyMC3 3.10.0 (7 December 2020)
 
diff --git a/pymc3/distributions/__init__.py b/pymc3/distributions/__init__.py
@@ -61,7 +61,6 @@
     HyperGeometric,
     NegativeBinomial,
     OrderedLogistic,
-    OrderedProbit,
     Poisson,
     ZeroInflatedBinomial,
     ZeroInflatedNegativeBinomial,
@@ -141,7 +140,6 @@
     "HyperGeometric",
     "Categorical",
     "OrderedLogistic",
-    "OrderedProbit",
     "DensityDist",
     "Distribution",
     "Continuous",
diff --git a/pymc3/distributions/discrete.py b/pymc3/distributions/discrete.py
@@ -24,10 +24,7 @@
     binomln,
     bound,
     factln,
-    log_diff_normal_cdf,
     logpow,
-    normal_lccdf,
-    normal_lcdf,
     random_choice,
 )
 from pymc3.distributions.distribution import Discrete, draw_values, generate_samples
@@ -1641,131 +1638,3 @@ def __init__(self, eta, cutpoints, *args, **kwargs):
         p = p_cum[..., 1:] - p_cum[..., :-1]
 
         super().__init__(p=p, *args, **kwargs)
-
-
-class OrderedProbit(Categorical):
-    R"""
-    Ordered Probit log-likelihood.
-
-    Useful for regression on ordinal data values whose values range
-    from 1 to K as a function of some predictor, :math:`\eta`. The
-    cutpoints, :math:`c`, separate which ranges of :math:`\eta` are
-    mapped to which of the K observed dependent variables.  The number
-    of cutpoints is K - 1.  It is recommended that the cutpoints are
-    constrained to be ordered.
-
-    In order to stabilize the computation, log-likelihood is computed
-    in log space using the scaled error function `erfcx`.
-
-    .. math::
-
-       f(k \mid \eta, c) = \left\{
-         \begin{array}{l}
-           1 - \text{normal_cdf}(0, \sigma, \eta - c_1)
-             \,, \text{if } k = 0 \\
-           \text{normal_cdf}(0, \sigma, \eta - c_{k - 1}) -
-           \text{normal_cdf}(0, \sigma, \eta - c_{k})
-             \,, \text{if } 0 < k < K \\
-           \text{normal_cdf}(0, \sigma, \eta - c_{K - 1})
-             \,, \text{if } k = K \\
-         \end{array}
-       \right.
-
-    Parameters
-    ----------
-    eta : float
-        The predictor.
-    c : array
-        The length K - 1 array of cutpoints which break :math:`\eta` into
-        ranges.  Do not explicitly set the first and last elements of
-        :math:`c` to negative and positive infinity.
-
-    sigma: float
-         The standard deviation of probit function.
-    Example
-    --------
-    .. code:: python
-
-        # Generate data for a simple 1 dimensional example problem
-        n1_c = 300; n2_c = 300; n3_c = 300
-        cluster1 = np.random.randn(n1_c) + -1
-        cluster2 = np.random.randn(n2_c) + 0
-        cluster3 = np.random.randn(n3_c) + 2
-
-        x = np.concatenate((cluster1, cluster2, cluster3))
-        y = np.concatenate((1*np.ones(n1_c),
-                            2*np.ones(n2_c),
-                            3*np.ones(n3_c))) - 1
-
-        # Ordered probit regression
-        with pm.Model() as model:
-            cutpoints = pm.Normal("cutpoints", mu=[-1,1], sigma=10, shape=2,
-                                  transform=pm.distributions.transforms.ordered)
-            y_ = pm.OrderedProbit("y", cutpoints=cutpoints, eta=x, observed=y)
-            tr = pm.sample(1000)
-
-        # Plot the results
-        plt.hist(cluster1, 30, alpha=0.5);
-        plt.hist(cluster2, 30, alpha=0.5);
-        plt.hist(cluster3, 30, alpha=0.5);
-        plt.hist(tr["cutpoints"][:,0], 80, alpha=0.2, color='k');
-        plt.hist(tr["cutpoints"][:,1], 80, alpha=0.2, color='k');
-
-    """
-
-    def __init__(self, eta, cutpoints, *args, **kwargs):
-
-        self.eta = tt.as_tensor_variable(floatX(eta))
-        self.cutpoints = tt.as_tensor_variable(cutpoints)
-
-        probits = tt.shape_padright(self.eta) - self.cutpoints
-        _log_p = tt.concatenate(
-            [
-                tt.shape_padright(normal_lccdf(0, 1, probits[..., 0])),
-                log_diff_normal_cdf(0, 1, probits[..., :-1], probits[..., 1:]),
-                tt.shape_padright(normal_lcdf(0, 1, probits[..., -1])),
-            ],
-            axis=-1,
-        )
-        _log_p = tt.as_tensor_variable(floatX(_log_p))
-
-        self._log_p = _log_p
-        self.mode = tt.argmax(_log_p, axis=-1)
-        p = tt.exp(_log_p)
-
-        super().__init__(p=p, *args, **kwargs)
-
-    def logp(self, value):
-        r"""
-        Calculate log-probability of Ordered Probit distribution at specified value.
-
-        Parameters
-        ----------
-        value: numeric
-            Value(s) for which log-probability is calculated. If the log probabilities for multiple
-            values are desired the values must be provided in a numpy array or theano tensor
-
-        Returns
-        -------
-        TensorVariable
-        """
-        logp = self._log_p
-        k = self.k
-
-        # Clip values before using them for indexing
-        value_clip = tt.clip(value, 0, k - 1)
-
-        if logp.ndim > 1:
-            if logp.ndim > value_clip.ndim:
-                value_clip = tt.shape_padleft(value_clip, logp.ndim - value_clip.ndim)
-            elif logp.ndim < value_clip.ndim:
-                logp = tt.shape_padleft(logp, value_clip.ndim - logp.ndim)
-            pattern = (logp.ndim - 1,) + tuple(range(logp.ndim - 1))
-            a = take_along_axis(
-                logp.dimshuffle(pattern),
-                value_clip,
-            )
-        else:
-            a = logp[value_clip]
-
-        return bound(a, value >= 0, value <= (k - 1))
diff --git a/pymc3/distributions/dist_math.py b/pymc3/distributions/dist_math.py
@@ -134,46 +134,6 @@ def normal_lccdf(mu, sigma, x):
     )
 
 
-def log_diff_normal_cdf(mu, sigma, x, y):
-    """
-    Compute :math:`\\log(\\Phi(\frac{x - \\mu}{\\sigma}) - \\Phi(\frac{y - \\mu}{\\sigma}))` safely in log space.
-
-    Parameters
-    ----------
-    mu: float
-        mean
-    sigma: float
-        std
-
-    x: float
-
-    y: float
-        must be strictly less than x.
-
-    Returns
-    -------
-    log (\\Phi(x) - \\Phi(y))
-
-    """
-    x = (x - mu) / sigma / tt.sqrt(2.0)
-    y = (y - mu) / sigma / tt.sqrt(2.0)
-
-    # To stabilize the computation, consider these three regions:
-    # 1) x > y > 0 => Use erf(x) = 1 - e^{-x^2} erfcx(x) and erf(y) =1 - e^{-y^2} erfcx(y)
-    # 2) 0 > x > y => Use erf(x) = e^{-x^2} erfcx(-x) and erf(y) = e^{-y^2} erfcx(-y)
-    # 3) x > 0 > y => Naive formula log( (erf(x) - erf(y)) / 2 ) works fine.
-    return tt.log(0.5) + tt.switch(
-        tt.gt(y, 0),
-        -tt.square(y) + tt.log(tt.erfcx(y) - tt.exp(tt.square(y) - tt.square(x)) * tt.erfcx(x)),
-        tt.switch(
-            tt.lt(x, 0),  # 0 > x > y
-            -tt.square(x)
-            + tt.log(tt.erfcx(-x) - tt.exp(tt.square(x) - tt.square(y)) * tt.erfcx(-y)),
-            tt.log(tt.erf(x) - tt.erf(y)),  # x >0 > y
-        ),
-    )
-
-
 def sigma2rho(sigma):
     """
     `sigma -> rho` theano converter
diff --git a/pymc3/tests/test_distributions.py b/pymc3/tests/test_distributions.py
@@ -26,7 +26,7 @@
 from numpy import array, exp, inf, log
 from numpy.testing import assert_allclose, assert_almost_equal, assert_equal
 from scipy import integrate
-from scipy.special import erf, logit
+from scipy.special import logit
 
 import pymc3 as pm
 
@@ -74,7 +74,6 @@
     NegativeBinomial,
     Normal,
     OrderedLogistic,
-    OrderedProbit,
     Pareto,
     Poisson,
     Rice,
@@ -432,17 +431,6 @@ def orderedlogistic_logpdf(value, eta, cutpoints):
     return np.where(np.all(ps >= 0), np.log(p), -np.inf)
 
 
-def invprobit(x):
-    return (erf(x / np.sqrt(2)) + 1) / 2
-
-
-def orderedprobit_logpdf(value, eta, cutpoints):
-    c = np.concatenate(([-np.inf], cutpoints, [np.inf]))
-    ps = np.array([invprobit(eta - cc) - invprobit(eta - cc1) for cc, cc1 in zip(c[:-1], c[1:])])
-    p = ps[value]
-    return np.where(np.all(ps >= 0), np.log(p), -np.inf)
-
-
 class Simplex:
     def __init__(self, n):
         self.vals = list(simplex_values(n))
@@ -1548,15 +1536,6 @@ def test_orderedlogistic(self, n):
             lambda value, eta, cutpoints: orderedlogistic_logpdf(value, eta, cutpoints),
         )
 
-    @pytest.mark.parametrize("n", [2, 3, 4])
-    def test_orderedprobit(self, n):
-        self.pymc3_matches_scipy(
-            OrderedProbit,
-            Domain(range(n), "int64"),
-            {"eta": Runif, "cutpoints": UnitSortedVector(n - 1)},
-            lambda value, eta, cutpoints: orderedprobit_logpdf(value, eta, cutpoints),
-        )
-
     def test_densitydist(self):
         def logp(x):
             return -log(2 * 0.5) - abs(x - 0.5) / 0.5