Skip to content

Commit 8191b49

Browse files
lucianopaztwiecki
authored andcommitted
Fixed categorical logp with tt.choose (#3563)
* Added tests for issue * Fix for #3535 * Added release notes
1 parent 4f67d63 commit 8191b49

File tree

3 files changed

+44
-5
lines changed

3 files changed

+44
-5
lines changed

RELEASE-NOTES.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,8 @@
1111

1212
### Maintenance
1313
- Moved math operations out of `Rice`, `TruncatedNormal`, `Triangular` and `ZeroInflatedNegativeBinomial` `random` methods. Math operations on values returned by `draw_values` might not broadcast well, and all the `size` aware broadcasting is left to `generate_samples`. Fixes [#3481](https://github.com/pymc-devs/pymc3/issues/3481) and [#3508](https://github.com/pymc-devs/pymc3/issues/3508)
14+
- Fixed a bug in `Categorical.logp`. In the case of multidimensional `p`'s, the indexing was done wrong leading to incorrectly shaped tensors that consumed `O(n**2)` memory instead of `O(n)`. This fixes issue [#3535](https://github.com/pymc-devs/pymc3/issues/3535)
15+
- Fixed a defect in `OrderedLogistic.__init__` that unnecessarily increased the dimensionality of the underlying `p`. Related to issue issue [#3535](https://github.com/pymc-devs/pymc3/issues/3535) but was not the true cause of it.
1416

1517
## PyMC3 3.7 (May 29 2019)
1618

pymc3/distributions/discrete.py

Lines changed: 10 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -998,7 +998,12 @@ def logp(self, value):
998998

999999
if p.ndim > 1:
10001000
pattern = (p.ndim - 1,) + tuple(range(p.ndim - 1))
1001-
a = tt.log(p.dimshuffle(pattern)[value_clip])
1001+
a = tt.log(
1002+
tt.choose(
1003+
value_clip,
1004+
p.dimshuffle(pattern),
1005+
)
1006+
)
10021007
else:
10031008
a = tt.log(p[value_clip])
10041009

@@ -1570,13 +1575,13 @@ def __init__(self, eta, cutpoints, *args, **kwargs):
15701575
self.eta = tt.as_tensor_variable(floatX(eta))
15711576
self.cutpoints = tt.as_tensor_variable(cutpoints)
15721577

1573-
pa = sigmoid(tt.shape_padleft(self.cutpoints) - tt.shape_padright(self.eta))
1578+
pa = sigmoid(self.cutpoints - tt.shape_padright(self.eta))
15741579
p_cum = tt.concatenate([
1575-
tt.zeros_like(tt.shape_padright(pa[:, 0])),
1580+
tt.zeros_like(tt.shape_padright(pa[..., 0])),
15761581
pa,
1577-
tt.ones_like(tt.shape_padright(pa[:, 0]))
1582+
tt.ones_like(tt.shape_padright(pa[..., 0]))
15781583
], axis=-1)
1579-
p = p_cum[:, 1:] - p_cum[:, :-1]
1584+
p = p_cum[..., 1:] - p_cum[..., :-1]
15801585

15811586
super().__init__(p=p, *args, **kwargs)
15821587

pymc3/tests/test_distributions.py

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1335,3 +1335,35 @@ def test_discrete_trafo():
13351335
with pytest.raises(ValueError) as err:
13361336
Binomial('a', n=5, p=0.5, transform='log')
13371337
err.match('Transformations for discrete distributions')
1338+
1339+
1340+
@pytest.mark.parametrize("shape", [tuple(), (1,), (3, 1), (3, 2)], ids=str)
1341+
def test_orderedlogistic_dimensions(shape):
1342+
# Test for issue #3535
1343+
loge = np.log10(np.exp(1))
1344+
size = 7
1345+
p = np.ones(shape + (10,)) / 10
1346+
cutpoints = np.tile(logit(np.linspace(0, 1, 11)[1:-1]), shape + (1,))
1347+
obs = np.random.randint(0, 1, size=(size,) + shape)
1348+
with Model():
1349+
ol = OrderedLogistic(
1350+
"ol",
1351+
eta=np.zeros(shape),
1352+
cutpoints=cutpoints,
1353+
shape=shape,
1354+
observed=obs
1355+
)
1356+
c = Categorical(
1357+
"c",
1358+
p=p,
1359+
shape=shape,
1360+
observed=obs
1361+
)
1362+
ologp = ol.logp({"ol": 1}) * loge
1363+
clogp = c.logp({"c": 1}) * loge
1364+
expected = -np.prod((size,) + shape)
1365+
1366+
assert c.distribution.p.ndim == (len(shape) + 1)
1367+
assert np.allclose(clogp, expected)
1368+
assert ol.distribution.p.ndim == (len(shape) + 1)
1369+
assert np.allclose(ologp, expected)

0 commit comments

Comments
 (0)