Skip to content

Commit 10cb019

Browse files
ricardoV94AlexAndorra
authored andcommitted
OrderedLogistic and OrderedProbit no longer subclass Distribution
This fixes a bug when auto-imputation takes place, in which case the logic to retrieve `p` wasn't valid. This will also show `p` as an input of the underlying Categorical in the graphviz.
1 parent 08f576c commit 10cb019

File tree

2 files changed

+94
-97
lines changed

2 files changed

+94
-97
lines changed

pymc/distributions/discrete.py

Lines changed: 43 additions & 54 deletions
Original file line numberDiff line numberDiff line change
@@ -1197,30 +1197,9 @@ class _OrderedLogistic(Categorical):
11971197
See docs for the OrderedLogistic wrapper class for more details on how to use it in models.
11981198
"""
11991199

1200-
rv_op = categorical
1201-
1202-
@classmethod
1203-
def dist(cls, eta, cutpoints, *args, **kwargs):
1204-
eta = pt.as_tensor_variable(eta)
1205-
cutpoints = pt.as_tensor_variable(cutpoints)
1206-
1207-
pa = sigmoid(cutpoints - pt.shape_padright(eta))
1208-
p_cum = pt.concatenate(
1209-
[
1210-
pt.zeros_like(pt.shape_padright(pa[..., 0])),
1211-
pa,
1212-
pt.ones_like(pt.shape_padright(pa[..., 0])),
1213-
],
1214-
axis=-1,
1215-
)
1216-
p = p_cum[..., 1:] - p_cum[..., :-1]
1217-
1218-
return super().dist(p, *args, **kwargs)
1219-
12201200

12211201
class OrderedLogistic:
1222-
R"""
1223-
Wrapper class for Ordered Logistic distributions.
1202+
R"""Ordered Logistic distribution.
12241203
12251204
Useful for regression on ordinal data values whose values range
12261205
from 1 to K as a function of some predictor, :math:`\eta`. The
@@ -1287,50 +1266,39 @@ class OrderedLogistic:
12871266
plt.hist(posterior["cutpoints"][1], 80, alpha=0.2, color='k');
12881267
"""
12891268

1290-
def __new__(cls, name, *args, compute_p=True, **kwargs):
1291-
out_rv = _OrderedLogistic(name, *args, **kwargs)
1269+
def __new__(cls, name, eta, cutpoints, compute_p=True, **kwargs):
1270+
p = cls.compute_p(eta, cutpoints)
12921271
if compute_p:
1293-
pm.Deterministic(f"{name}_probs", out_rv.owner.inputs[3], dims=kwargs.get("dims"))
1272+
p = pm.Deterministic(f"{name}_probs", p, dims=kwargs.get("dims"))
1273+
out_rv = Categorical(name, p=p, **kwargs)
12941274
return out_rv
12951275

12961276
@classmethod
1297-
def dist(cls, *args, **kwargs):
1298-
return _OrderedLogistic.dist(*args, **kwargs)
1299-
1300-
1301-
class _OrderedProbit(Categorical):
1302-
r"""
1303-
Underlying class for ordered probit distributions.
1304-
See docs for the OrderedProbit wrapper class for more details on how to use it in models.
1305-
"""
1306-
1307-
rv_op = categorical
1277+
def dist(cls, eta, cutpoints, **kwargs):
1278+
p = cls.compute_p(eta, cutpoints)
1279+
return Categorical.dist(p=p, **kwargs)
13081280

13091281
@classmethod
1310-
def dist(cls, eta, cutpoints, sigma=1, *args, **kwargs):
1282+
def compute_p(cls, eta, cutpoints):
13111283
eta = pt.as_tensor_variable(eta)
13121284
cutpoints = pt.as_tensor_variable(cutpoints)
13131285

1314-
probits = pt.shape_padright(eta) - cutpoints
1315-
_log_p = pt.concatenate(
1286+
pa = sigmoid(cutpoints - pt.shape_padright(eta))
1287+
p_cum = pt.concatenate(
13161288
[
1317-
pt.shape_padright(normal_lccdf(0, sigma, probits[..., 0])),
1318-
log_diff_normal_cdf(
1319-
0, pt.shape_padright(sigma), probits[..., :-1], probits[..., 1:]
1320-
),
1321-
pt.shape_padright(normal_lcdf(0, sigma, probits[..., -1])),
1289+
pt.zeros_like(pt.shape_padright(pa[..., 0])),
1290+
pa,
1291+
pt.ones_like(pt.shape_padright(pa[..., 0])),
13221292
],
13231293
axis=-1,
13241294
)
1325-
_log_p = pt.as_tensor_variable(_log_p)
1326-
p = pt.exp(_log_p)
1327-
1328-
return super().dist(p, *args, **kwargs)
1295+
p = p_cum[..., 1:] - p_cum[..., :-1]
1296+
return p
13291297

13301298

13311299
class OrderedProbit:
13321300
R"""
1333-
Wrapper class for Ordered Probit distributions.
1301+
Ordered Probit distributions.
13341302
13351303
Useful for regression on ordinal data values whose values range
13361304
from 1 to K as a function of some predictor, :math:`\eta`. The
@@ -1402,12 +1370,33 @@ class OrderedProbit:
14021370
plt.hist(posterior["cutpoints"][1], 80, alpha=0.2, color='k');
14031371
"""
14041372

1405-
def __new__(cls, name, *args, compute_p=True, **kwargs):
1406-
out_rv = _OrderedProbit(name, *args, **kwargs)
1373+
def __new__(cls, name, eta, cutpoints, sigma=1, compute_p=True, **kwargs):
1374+
p = cls.compute_p(eta, cutpoints, sigma)
14071375
if compute_p:
1408-
pm.Deterministic(f"{name}_probs", out_rv.owner.inputs[3], dims=kwargs.get("dims"))
1376+
p = pm.Deterministic(f"{name}_probs", p, dims=kwargs.get("dims"))
1377+
out_rv = Categorical(name, p=p, **kwargs)
14091378
return out_rv
14101379

14111380
@classmethod
1412-
def dist(cls, *args, **kwargs):
1413-
return _OrderedProbit.dist(*args, **kwargs)
1381+
def dist(cls, eta, cutpoints, sigma=1, **kwargs):
1382+
p = cls.compute_p(eta, cutpoints, sigma)
1383+
return Categorical.dist(p=p, **kwargs)
1384+
1385+
@classmethod
1386+
def compute_p(cls, eta, cutpoints, sigma):
1387+
eta = pt.as_tensor_variable(eta)
1388+
cutpoints = pt.as_tensor_variable(cutpoints)
1389+
1390+
probits = pt.shape_padright(eta) - cutpoints
1391+
log_p = pt.concatenate(
1392+
[
1393+
pt.shape_padright(normal_lccdf(0, sigma, probits[..., 0])),
1394+
log_diff_normal_cdf(
1395+
0, pt.shape_padright(sigma), probits[..., :-1], probits[..., 1:]
1396+
),
1397+
pt.shape_padright(normal_lcdf(0, sigma, probits[..., -1])),
1398+
],
1399+
axis=-1,
1400+
)
1401+
p = pt.exp(log_p)
1402+
return p

tests/distributions/test_discrete.py

Lines changed: 51 additions & 43 deletions
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,7 @@
2929

3030
import pymc as pm
3131

32-
from pymc.distributions.discrete import _OrderedLogistic, _OrderedProbit
32+
from pymc.distributions.discrete import OrderedLogistic, OrderedProbit
3333
from pymc.logprob.basic import icdf, logcdf, logp
3434
from pymc.logprob.utils import ParameterValueError
3535
from pymc.pytensorf import floatX
@@ -481,26 +481,6 @@ def test_orderedlogistic_dimensions(shape):
481481
assert np.allclose(ologp, expected)
482482

483483

484-
def test_ordered_logistic_probs():
485-
with pm.Model() as m:
486-
pm.OrderedLogistic("ol_p", cutpoints=np.array([-2, 0, 2]), eta=0)
487-
pm.OrderedLogistic("ol_no_p", cutpoints=np.array([-2, 0, 2]), eta=0, compute_p=False)
488-
assert len(m.deterministics) == 1
489-
490-
x = pm.OrderedLogistic.dist(cutpoints=np.array([-2, 0, 2]), eta=0)
491-
assert isinstance(x, TensorVariable)
492-
493-
494-
def test_ordered_probit_probs():
495-
with pm.Model() as m:
496-
pm.OrderedProbit("op_p", cutpoints=np.array([-2, 0, 2]), eta=0, sigma=1)
497-
pm.OrderedProbit("op_no_p", cutpoints=np.array([-2, 0, 2]), eta=0, sigma=1, compute_p=False)
498-
assert len(m.deterministics) == 1
499-
500-
x = pm.OrderedProbit.dist(cutpoints=np.array([-2, 0, 2]), eta=0, sigma=1)
501-
assert isinstance(x, TensorVariable)
502-
503-
504484
class TestMoments:
505485
@pytest.mark.parametrize(
506486
"p, size, expected",
@@ -857,14 +837,12 @@ def test_implied_degenerate_shape(self):
857837
assert x.eval().shape == (1,)
858838

859839

860-
class TestOrderedLogistic(BaseTestDistributionRandom):
861-
pymc_dist = _OrderedLogistic
862-
pymc_dist_params = {"eta": 0, "cutpoints": np.array([-2, 0, 2])}
863-
expected_rv_op_params = {"p": np.array([0.11920292, 0.38079708, 0.38079708, 0.11920292])}
864-
checks_to_run = [
865-
"check_pymc_params_match_rv_op",
866-
"check_rv_size",
867-
]
840+
class TestOrderedLogistic:
841+
def test_expected_categorical(self):
842+
categorical = OrderedLogistic.dist(eta=0, cutpoints=np.array([-2, 0, 2]))
843+
p = categorical.owner.inputs[3].eval()
844+
expected_p = np.array([0.11920292, 0.38079708, 0.38079708, 0.11920292])
845+
np.testing.assert_allclose(p, expected_p)
868846

869847
@pytest.mark.parametrize(
870848
"eta, cutpoints, expected",
@@ -881,22 +859,34 @@ def test_shape_inputs(self, eta, cutpoints, expected):
881859
"""
882860
This test checks when providing different shapes for `eta` parameters.
883861
"""
884-
categorical = _OrderedLogistic.dist(
862+
categorical = OrderedLogistic.dist(
885863
eta=eta,
886864
cutpoints=cutpoints,
887865
)
888-
p = categorical.owner.inputs[3].eval()
889-
assert p.shape == expected
866+
p_shape = tuple(categorical.owner.inputs[-1].shape.eval())
867+
assert p_shape == expected
890868

869+
def test_compute_p(self):
870+
with pm.Model() as m:
871+
pm.OrderedLogistic("ol_p", cutpoints=np.array([-2, 0, 2]), eta=0)
872+
pm.OrderedLogistic("ol_no_p", cutpoints=np.array([-2, 0, 2]), eta=0, compute_p=False)
873+
assert len(m.deterministics) == 1
891874

892-
class TestOrderedProbit(BaseTestDistributionRandom):
893-
pymc_dist = _OrderedProbit
894-
pymc_dist_params = {"eta": 0, "cutpoints": np.array([-2, 0, 2])}
895-
expected_rv_op_params = {"p": np.array([0.02275013, 0.47724987, 0.47724987, 0.02275013])}
896-
checks_to_run = [
897-
"check_pymc_params_match_rv_op",
898-
"check_rv_size",
899-
]
875+
x = pm.OrderedLogistic.dist(cutpoints=np.array([-2, 0, 2]), eta=0)
876+
assert isinstance(x, TensorVariable)
877+
878+
# Test it works with auto-imputation
879+
with pm.Model() as m:
880+
pm.OrderedLogistic("ol", cutpoints=np.array([-2, 0, 2]), eta=0, observed=[0, np.nan, 1])
881+
assert len(m.deterministics) == 2 # One from the auto-imputation, the other from compute_p
882+
883+
884+
class TestOrderedProbit:
885+
def test_expected_categorical(self):
886+
categorical = OrderedProbit.dist(eta=0, cutpoints=np.array([-2, 0, 2]))
887+
p = categorical.owner.inputs[3].eval()
888+
expected_p = np.array([0.02275013, 0.47724987, 0.47724987, 0.02275013])
889+
np.testing.assert_allclose(p, expected_p)
900890

901891
@pytest.mark.parametrize(
902892
"eta, cutpoints, sigma, expected",
@@ -914,10 +904,28 @@ def test_shape_inputs(self, eta, cutpoints, sigma, expected):
914904
"""
915905
This test checks when providing different shapes for `eta` and `sigma` parameters.
916906
"""
917-
categorical = _OrderedProbit.dist(
907+
categorical = OrderedProbit.dist(
918908
eta=eta,
919909
cutpoints=cutpoints,
920910
sigma=sigma,
921911
)
922-
p = categorical.owner.inputs[3].eval()
923-
assert p.shape == expected
912+
p_shape = tuple(categorical.owner.inputs[-1].shape.eval())
913+
assert p_shape == expected
914+
915+
def test_compute_p(self):
916+
with pm.Model() as m:
917+
pm.OrderedProbit("op_p", cutpoints=np.array([-2, 0, 2]), eta=0, sigma=1)
918+
pm.OrderedProbit(
919+
"op_no_p", cutpoints=np.array([-2, 0, 2]), eta=0, sigma=1, compute_p=False
920+
)
921+
assert len(m.deterministics) == 1
922+
923+
x = pm.OrderedProbit.dist(cutpoints=np.array([-2, 0, 2]), eta=0, sigma=1)
924+
assert isinstance(x, TensorVariable)
925+
926+
# Test it works with auto-imputation
927+
with pm.Model() as m:
928+
pm.OrderedProbit(
929+
"op", cutpoints=np.array([-2, 0, 2]), eta=0, sigma=1, observed=[0, np.nan, 1]
930+
)
931+
assert len(m.deterministics) == 2 # One from the auto-imputation, the other from compute_p

0 commit comments

Comments
 (0)