Revert "Changed Categorical to work with multidim p at the logp level (#3383)"

twiecki · web-flow · commit de91752fcd75 · 2019-02-26T11:27:12.000+01:00
This reverts commit 8d9d3d8.
diff --git a/RELEASE-NOTES.md b/RELEASE-NOTES.md
@@ -23,9 +23,6 @@
 - Fixed incorrect usage of `broadcast_distribution_samples` in `DiscreteWeibull`.
 - `Mixture`'s default dtype is now determined by `theano.config.floatX`.
 - `dist_math.random_choice` now handles nd-arrays of category probabilities, and also handles sizes that are not `None`. Also removed unused `k` kwarg from `dist_math.random_choice`.
-- Changed `Categorical.mode` to preserve all the dimensions of `p` except the last one, which encodes each category's probability.
-- Changed initialization of `Categorical.p`. `p` is now normalized to sum to `1` inside `logp` and `random`, but not during initialization. This could hide negative values supplied to `p` as mentioned in #2082.
-- To be able to test for negative `p` values supplied to `Categorical`, `Categorical.logp` was changed to check for `sum(self.p, axis=-1) == 1` only if `self.p` is not a `Number`, `np.ndarray`, `TensorConstant` or `SharedVariable`. These cases are automatically normalized to sum to `1`. The other condition may originate from a `step_method` proposal, where `self.p` tensor's value may be set, but must sum to 1 nevertheless. This may break old code which intialized `p` with a theano expression and relied on the default normalization to get it to sum to 1. `Categorical.logp` now also checks that the used `p` has values lower than 1.
 
 ### Deprecations
 
diff --git a/pymc3/distributions/discrete.py b/pymc3/distributions/discrete.py
@@ -1,4 +1,3 @@
-import numbers
 import numpy as np
 import theano
 import theano.tensor as tt
@@ -711,17 +710,11 @@ def __init__(self, p, *args, **kwargs):
         except AttributeError:
             self.k = tt.shape(p)[-1]
         p = tt.as_tensor_variable(floatX(p))
-
-        # From #2082, it may be dangerous to automatically rescale p at this
-        # point without checking for positiveness
-        self.p = p
-        self.mode = tt.argmax(p, axis=-1)
-        if self.mode.ndim == 1:
-            self.mode = tt.squeeze(self.mode)
+        self.p = (p.T / tt.sum(p, -1)).T
+        self.mode = tt.argmax(p)
 
     def random(self, point=None, size=None):
         p, k = draw_values([self.p, self.k], point=point, size=size)
-        p = p / np.sum(p, axis=-1, keepdims=True)
 
         return generate_samples(random_choice,
                                 p=p,
@@ -730,33 +723,21 @@ def random(self, point=None, size=None):
                                 size=size)
 
     def logp(self, value):
-        p_ = self.p
+        p = self.p
         k = self.k
 
         # Clip values before using them for indexing
         value_clip = tt.clip(value, 0, k - 1)
 
-        # We must only check that the values sum to 1 if p comes from a
-        # tensor variable, i.e. when p is a step_method proposal. In the other
-        # cases we normalize ourselves
-        if not isinstance(p_, (numbers.Number,
-                               np.ndarray,
-                               tt.TensorConstant,
-                               tt.sharedvar.SharedVariable)):
-            sumto1 = theano.gradient.zero_grad(
-                tt.le(abs(tt.sum(p_, axis=-1) - 1), 1e-5))
-            p = p_
-        else:
-            p = p_ / tt.sum(p_, axis=-1, keepdims=True)
-            sumto1 = True
+        sumto1 = theano.gradient.zero_grad(
+            tt.le(abs(tt.sum(p, axis=-1) - 1), 1e-5))
 
         if p.ndim > 1:
-            a = tt.log(np.moveaxis(p, -1, 0)[value_clip])
+            a = tt.log(p[tt.arange(p.shape[0]), value_clip])
         else:
             a = tt.log(p[value_clip])
 
-        return bound(a, value >= 0, value <= (k - 1), sumto1,
-                     tt.all(p_ > 0, axis=-1), tt.all(p <= 1, axis=-1))
+        return bound(a, value >= 0, value <= (k - 1), sumto1)
 
     def _repr_latex_(self, name=None, dist=None):
         if dist is None:
@@ -1196,7 +1177,7 @@ def __init__(self, eta, cutpoints, *args, **kwargs):
             tt.zeros_like(tt.shape_padright(pa[:, 0])),
             pa,
             tt.ones_like(tt.shape_padright(pa[:, 0]))
-        ], axis=-1)
+        ], axis=1)
         p = p_cum[:, 1:] - p_cum[:, :-1]
 
         super().__init__(p=p, *args, **kwargs)
diff --git a/pymc3/tests/test_distribution_defaults.py b/pymc3/tests/test_distribution_defaults.py
@@ -1,5 +1,5 @@
 from ..model import Model
-from ..distributions import DiscreteUniform, Continuous, Categorical
+from ..distributions import DiscreteUniform, Continuous
 
 import numpy as np
 import pytest
@@ -67,10 +67,3 @@ def test_discrete_uniform_negative():
     with model:
         x = DiscreteUniform('x', lower=-10, upper=0)
     assert model.test_point['x'] == -5
-
-
-def test_categorical_mode():
-    model = Model()
-    with model:
-        x = Categorical('x', p=np.eye(4), shape=4)
-    assert np.allclose(model.test_point['x'], np.arange(4))
diff --git a/pymc3/tests/test_distributions.py b/pymc3/tests/test_distributions.py
@@ -321,7 +321,7 @@ def dirichlet_logpdf(value, a):
 
 def categorical_logpdf(value, p):
     if value >= 0 and value <= len(p):
-        return floatX(np.log(np.moveaxis(p, -1, 0)[value]))
+        return floatX(np.log(p[value]))
     else:
         return -inf
 
@@ -346,10 +346,8 @@ def invlogit(x, eps=sys.float_info.epsilon):
 
 def orderedlogistic_logpdf(value, eta, cutpoints):
     c = np.concatenate(([-np.inf], cutpoints, [np.inf]))
-    ps = np.array([invlogit(eta - cc) - invlogit(eta - cc1)
-                   for cc, cc1 in zip(c[:-1], c[1:])])
-    p = ps[value]
-    return np.where(np.all(ps > 0), np.log(p), -np.inf)
+    p = invlogit(eta - c[value]) - invlogit(eta - c[value + 1])
+    return np.log(p)
 
 class Simplex:
     def __init__(self, n):
@@ -1081,29 +1079,6 @@ def test_categorical_bounds(self):
             assert np.isinf(x.logp({'x': -1}))
             assert np.isinf(x.logp({'x': 3}))
 
-    def test_categorical_valid_p(self):
-        with Model():
-            x = Categorical('x', p=np.array([-0.2, 0.3, 0.5]))
-            assert np.isinf(x.logp({'x': 0}))
-            assert np.isinf(x.logp({'x': 1}))
-            assert np.isinf(x.logp({'x': 2}))
-        with Model():
-            # A model where p sums to 1 but contains negative values
-            x = Categorical('x', p=np.array([-0.2, 0.7, 0.5]))
-            assert np.isinf(x.logp({'x': 0}))
-            assert np.isinf(x.logp({'x': 1}))
-            assert np.isinf(x.logp({'x': 2}))
-        with Model():
-            # Hard edge case from #2082
-            # Early automatic normalization of p's sum would hide the negative
-            # entries if there is a single or pair number of negative values
-            # and the rest are zero
-            x = Categorical('x', p=np.array([-1, -1, 0, 0]))
-            assert np.isinf(x.logp({'x': 0}))
-            assert np.isinf(x.logp({'x': 1}))
-            assert np.isinf(x.logp({'x': 2}))
-            assert np.isinf(x.logp({'x': 3}))
-
     @pytest.mark.parametrize('n', [2, 3, 4])
     def test_categorical(self, n):
         self.pymc3_matches_scipy(Categorical, Domain(range(n), 'int64'), {'p': Simplex(n)},
diff --git a/pymc3/tests/test_distributions_random.py b/pymc3/tests/test_distributions_random.py
@@ -444,9 +444,6 @@ def test_probability_vector_shape(self):
         p = np.ones((10, 5))
         assert pm.Categorical.dist(p=p).random().shape == (10,)
         assert pm.Categorical.dist(p=p).random(size=4).shape == (4, 10)
-        p = np.ones((3, 7, 5))
-        assert pm.Categorical.dist(p=p).random().shape == (3, 7)
-        assert pm.Categorical.dist(p=p).random(size=4).shape == (4, 3, 7)
 
 
 class TestScalarParameterSamples(SeededTest):