Fix Categorical.logp with take_along_axis (#3572)

lucianopaz · aloctavodia · commit 0d5203c43198 · 2019-08-17T12:09:09.000-03:00
* Added failing tests

* Fixed Categorical.logp using take_along_axis. Added tests for the theanof

* Added release notes

* Account for ndim mismatch in Categorical logp
diff --git a/RELEASE-NOTES.md b/RELEASE-NOTES.md
@@ -13,6 +13,8 @@
 ### Maintenance
 - Moved math operations out of `Rice`, `TruncatedNormal`, `Triangular` and `ZeroInflatedNegativeBinomial` `random` methods. Math operations on values returned by `draw_values` might not broadcast well, and all the `size` aware broadcasting is left to `generate_samples`. Fixes [#3481](https://github.com/pymc-devs/pymc3/issues/3481) and [#3508](https://github.com/pymc-devs/pymc3/issues/3508)
 - Parallelization of population steppers (`DEMetropolis`) is now set via the `cores` argument. ([#3559](https://github.com/pymc-devs/pymc3/pull/3559))
+- Fixed a bug in `Categorical.logp`. In the case of multidimensional `p`'s, the indexing was done wrong leading to incorrectly shaped tensors that consumed `O(n**2)` memory instead of `O(n)`. This fixes issue [#3535](https://github.com/pymc-devs/pymc3/issues/3535)
+- Fixed a defect in `OrderedLogistic.__init__` that unnecessarily increased the dimensionality of the underlying `p`. Related to issue issue [#3535](https://github.com/pymc-devs/pymc3/issues/3535) but was not the true cause of it.
 - SMC: stabilize covariance matrix [3573](https://github.com/pymc-devs/pymc3/pull/3573)
 - SMC is no longer a step method of `pm.sample` now it should be called using `pm.sample_smc` [3579](https://github.com/pymc-devs/pymc3/pull/3579)
 - Now uses `multiprocessong` rather than `psutil` to count CPUs, which results in reliable core counts on Chromebooks.
diff --git a/pymc3/distributions/discrete.py b/pymc3/distributions/discrete.py
@@ -8,7 +8,7 @@
 from .distribution import Discrete, draw_values, generate_samples
 from .shape_utils import broadcast_distribution_samples
 from pymc3.math import tround, sigmoid, logaddexp, logit, log1pexp
-from ..theanof import floatX, intX
+from ..theanof import floatX, intX, take_along_axis
 
 
 __all__ = ['Binomial',  'BetaBinomial',  'Bernoulli',  'DiscreteWeibull',
@@ -997,8 +997,21 @@ def logp(self, value):
         p = p_ / tt.sum(p_, axis=-1, keepdims=True)
 
         if p.ndim > 1:
+            if p.ndim > value_clip.ndim:
+                value_clip = tt.shape_padleft(
+                    value_clip, p_.ndim - value_clip.ndim
+                )
+            elif p.ndim < value_clip.ndim:
+                p = tt.shape_padleft(
+                    p, value_clip.ndim - p_.ndim
+                )
             pattern = (p.ndim - 1,) + tuple(range(p.ndim - 1))
-            a = tt.log(p.dimshuffle(pattern)[value_clip])
+            a = tt.log(
+                take_along_axis(
+                    p.dimshuffle(pattern),
+                    value_clip,
+                )
+            )
         else:
             a = tt.log(p[value_clip])
 
@@ -1571,13 +1584,13 @@ def __init__(self, eta, cutpoints, *args, **kwargs):
         self.eta = tt.as_tensor_variable(floatX(eta))
         self.cutpoints = tt.as_tensor_variable(cutpoints)
 
-        pa = sigmoid(tt.shape_padleft(self.cutpoints) - tt.shape_padright(self.eta))
+        pa = sigmoid(self.cutpoints - tt.shape_padright(self.eta))
         p_cum = tt.concatenate([
-            tt.zeros_like(tt.shape_padright(pa[:, 0])),
+            tt.zeros_like(tt.shape_padright(pa[..., 0])),
             pa,
-            tt.ones_like(tt.shape_padright(pa[:, 0]))
+            tt.ones_like(tt.shape_padright(pa[..., 0]))
         ], axis=-1)
-        p = p_cum[:, 1:] - p_cum[:, :-1]
+        p = p_cum[..., 1:] - p_cum[..., :-1]
 
         super().__init__(p=p, *args, **kwargs)
 
diff --git a/pymc3/tests/test_distributions.py b/pymc3/tests/test_distributions.py
@@ -1335,3 +1335,35 @@ def test_discrete_trafo():
         with pytest.raises(ValueError) as err:
             Binomial('a', n=5, p=0.5, transform='log')
         err.match('Transformations for discrete distributions')
+
+
+@pytest.mark.parametrize("shape", [tuple(), (1,), (3, 1), (3, 2)], ids=str)
+def test_orderedlogistic_dimensions(shape):
+    # Test for issue #3535
+    loge = np.log10(np.exp(1))
+    size = 7
+    p = np.ones(shape + (10,)) / 10
+    cutpoints = np.tile(logit(np.linspace(0, 1, 11)[1:-1]), shape + (1,))
+    obs = np.random.randint(0, 1, size=(size,) + shape)
+    with Model():
+        ol = OrderedLogistic(
+            "ol",
+            eta=np.zeros(shape),
+            cutpoints=cutpoints,
+            shape=shape,
+            observed=obs
+        )
+        c = Categorical(
+            "c",
+            p=p,
+            shape=shape,
+            observed=obs
+        )
+    ologp = ol.logp({"ol": 1}) * loge
+    clogp = c.logp({"c": 1}) * loge
+    expected = -np.prod((size,) + shape)
+
+    assert c.distribution.p.ndim == (len(shape) + 1)
+    assert np.allclose(clogp, expected)
+    assert ol.distribution.p.ndim == (len(shape) + 1)
+    assert np.allclose(ologp, expected)
diff --git a/pymc3/tests/test_theanof.py b/pymc3/tests/test_theanof.py
@@ -1,9 +1,61 @@
 import collections
-
 import pytest
-from theano import theano
+from itertools import product
+
+from theano import theano, tensor as tt
+import numpy as np
+
+from pymc3.theanof import set_theano_conf, take_along_axis, _conversion_map
+from pymc3.vartypes import int_types
+
+
+FLOATX = str(theano.config.floatX)
+INTX = str(_conversion_map[FLOATX])
+
+
+def _make_along_axis_idx(arr_shape, indices, axis):
+    # compute dimensions to iterate over
+    if str(indices.dtype) not in int_types:
+        raise IndexError('`indices` must be an integer array')
+    shape_ones = (1,) * indices.ndim
+    dest_dims = list(range(axis)) + [None] + list(range(axis+1, indices.ndim))
 
-from pymc3.theanof import set_theano_conf
+    # build a fancy index, consisting of orthogonal aranges, with the
+    # requested index inserted at the right location
+    fancy_index = []
+    for dim, n in zip(dest_dims, arr_shape):
+        if dim is None:
+            fancy_index.append(indices)
+        else:
+            ind_shape = shape_ones[:dim] + (-1,) + shape_ones[dim+1:]
+            fancy_index.append(np.arange(n).reshape(ind_shape))
+
+    return tuple(fancy_index)
+
+
+if hasattr(np, "take_along_axis"):
+    np_take_along_axis = np.take_along_axis
+else:
+    def np_take_along_axis(arr, indices, axis):
+        if arr.shape[axis] <= 32:
+            # We can safely test with numpy's choose
+            arr = np.moveaxis(arr, axis, 0)
+            indices = np.moveaxis(indices, axis, 0)
+            out = np.choose(indices, arr)
+            return np.moveaxis(out, 0, axis)
+        else:
+            # numpy's choose cannot handle such a large axis so we
+            # just use the implementation of take_along_axis. This is kind of
+            # cheating because our implementation is the same as the one below
+            if axis < 0:
+                _axis = arr.ndim + axis
+            else:
+                _axis = axis
+            if _axis < 0 or _axis >= arr.ndim:
+                raise ValueError(
+                    "Supplied axis {} is out of bounds".format(axis)
+                )
+            return arr[_make_along_axis_idx(arr.shape, indices, _axis)]
 
 
 class TestSetTheanoConfig:
@@ -26,3 +78,165 @@ def test_restore(self):
             assert conf == {'compute_test_value': 'off'}
             conf = set_theano_conf(conf)
             assert conf == {'compute_test_value': 'raise'}
+
+
+class TestTakeAlongAxis():
+    def setup_class(self):
+        self.inputs_buffer = dict()
+        self.output_buffer = dict()
+        self.func_buffer = dict()
+
+    def _input_tensors(self, shape):
+        ndim = len(shape)
+        arr = tt.TensorType(FLOATX, [False] * ndim)("arr")
+        indices = tt.TensorType(INTX, [False] * ndim)("indices")
+        arr.tag.test_value = np.zeros(shape, dtype=FLOATX)
+        indices.tag.test_value = np.zeros(shape, dtype=INTX)
+        return arr, indices
+
+    def get_input_tensors(self, shape):
+        ndim = len(shape)
+        try:
+            return self.inputs_buffer[ndim]
+        except KeyError:
+            arr, indices = self._input_tensors(shape)
+            self.inputs_buffer[ndim] = arr, indices
+            return arr, indices
+
+    def _output_tensor(self, arr, indices, axis):
+        return take_along_axis(arr, indices, axis)
+
+    def get_output_tensors(self, shape, axis):
+        ndim = len(shape)
+        try:
+            return self.output_buffer[(ndim, axis)]
+        except KeyError:
+            arr, indices = self.get_input_tensors(shape)
+            out = self._output_tensor(arr, indices, axis)
+            self.output_buffer[(ndim, axis)] = out
+            return out
+
+    def _function(self, arr, indices, out):
+        return theano.function([arr, indices], [out])
+
+    def get_function(self, shape, axis):
+        ndim = len(shape)
+        try:
+            return self.func_buffer[(ndim, axis)]
+        except KeyError:
+            arr, indices = self.get_input_tensors(shape)
+            out = self.get_output_tensors(shape, axis)
+            func = self._function(arr, indices, out)
+            self.func_buffer[(ndim, axis)] = func
+            return func
+
+    @staticmethod
+    def get_input_values(shape, axis, samples):
+        arr = np.random.randn(*shape).astype(FLOATX)
+        size = list(shape)
+        size[axis] = samples
+        size = tuple(size)
+        indices = np.random.randint(
+            low=0, high=shape[axis], size=size, dtype=INTX
+        )
+        return arr, indices
+
+    @pytest.mark.parametrize(
+        ["shape", "axis", "samples"],
+        product(
+            [
+                (1,),
+                (3,),
+                (3, 1),
+                (3, 2),
+                (1, 1),
+                (1, 2),
+                (40, 40),  # choose fails here
+                (5, 1, 1),
+                (5, 1, 2),
+                (5, 3, 1),
+                (5, 3, 2),
+            ],
+            [0, -1],
+            [1, 10],
+        ),
+        ids=str,
+    )
+    def test_take_along_axis(self, shape, axis, samples):
+        arr, indices = self.get_input_values(shape, axis, samples)
+        func = self.get_function(shape, axis)
+        assert np.allclose(
+            np_take_along_axis(arr, indices, axis=axis),
+            func(arr, indices)[0]
+        )
+
+    @pytest.mark.parametrize(
+        ["shape", "axis", "samples"],
+        product(
+            [
+                (1,),
+                (3,),
+                (3, 1),
+                (3, 2),
+                (1, 1),
+                (1, 2),
+                (40, 40),  # choose fails here
+                (5, 1, 1),
+                (5, 1, 2),
+                (5, 3, 1),
+                (5, 3, 2),
+            ],
+            [0, -1],
+            [1, 10],
+        ),
+        ids=str,
+    )
+    def test_take_along_axis_grad(self, shape, axis, samples):
+        if axis < 0:
+            _axis = len(shape) + axis
+        else:
+            _axis = axis
+        # Setup the theano function
+        t_arr, t_indices = self.get_input_tensors(shape)
+        t_out2 = theano.grad(
+            tt.sum(self._output_tensor(t_arr**2, t_indices, axis)),
+            t_arr,
+        )
+        func = theano.function([t_arr, t_indices], [t_out2])
+
+        # Test that the gradient gives the same output as what is expected
+        arr, indices = self.get_input_values(shape, axis, samples)
+        expected_grad = np.zeros_like(arr)
+        slicer = [slice(None)] * len(shape)
+        for i in range(indices.shape[axis]):
+            slicer[axis] = i
+            inds = indices[slicer].reshape(
+                shape[:_axis] + (1,) + shape[_axis + 1:]
+            )
+            inds = _make_along_axis_idx(shape, inds, _axis)
+            expected_grad[inds] += 1
+        expected_grad *= 2 * arr
+        out = func(arr, indices)[0]
+        assert np.allclose(out, expected_grad)
+
+    @pytest.mark.parametrize("axis", [-4, 4], ids=str)
+    def test_axis_failure(self, axis):
+        arr, indices = self.get_input_tensors((3, 1))
+        with pytest.raises(ValueError):
+            take_along_axis(arr, indices, axis=axis)
+
+    def test_ndim_failure(self):
+        arr = tt.TensorType(FLOATX, [False] * 3)("arr")
+        indices = tt.TensorType(INTX, [False] * 2)("indices")
+        arr.tag.test_value = np.zeros((1,) * arr.ndim, dtype=FLOATX)
+        indices.tag.test_value = np.zeros((1,) * indices.ndim, dtype=INTX)
+        with pytest.raises(ValueError):
+            take_along_axis(arr, indices)
+
+    def test_dtype_failure(self):
+        arr = tt.TensorType(FLOATX, [False] * 3)("arr")
+        indices = tt.TensorType(FLOATX, [False] * 3)("indices")
+        arr.tag.test_value = np.zeros((1,) * arr.ndim, dtype=FLOATX)
+        indices.tag.test_value = np.zeros((1,) * indices.ndim, dtype=FLOATX)
+        with pytest.raises(IndexError):
+            take_along_axis(arr, indices)
diff --git a/pymc3/theanof.py b/pymc3/theanof.py