forked from pymc-devs/pymc-examples
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathrankdata_ordered.py
64 lines (50 loc) · 1.67 KB
/
rankdata_ordered.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
import arviz as az
import numpy as np
import theano.tensor as tt
import pymc3 as pm
"""
Using Ordered transformation to model ranking data
inspired by the Stan implementation of Thurstonian model
see http://discourse.mc-stan.org/t/thurstonian-model/1735/5
also see related discussion on PyMC3 discourse:
https://discourse.pymc.io/t/order-statistics-in-pymc3/617
"""
# data
K = 5 # number of items being ranked
J = 100 # number of raters
yreal = np.argsort(np.random.randn(1, K), axis=-1)
y = np.argsort(yreal + np.random.randn(J, K), axis=-1)
# transformed data
y_argsort = np.argsort(y, axis=-1)
with pm.Model() as m:
mu_hat = pm.Normal("mu_hat", 0, 1, shape=K - 1)
# set first value to 0 to avoid unidentified model
mu = tt.concatenate([[0.0], mu_hat])
# sd = pm.HalfCauchy('sigma', 1.)
latent = pm.Normal(
"latent",
mu=mu[y_argsort],
sigma=1.0, # using sd does not work yet
transform=pm.distributions.transforms.ordered,
shape=y_argsort.shape,
testval=np.repeat(np.arange(K)[:, None], J, axis=1).T,
)
# There are some problems using Ordered
# right now, you need to specify testval
def run(n=1500):
if n == "short":
n = 50
with m:
trace = pm.sample(n)
az.plot_trace(trace, var_names=["mu_hat"])
print("Example observed data: ")
print(y[:30, :].T)
print("The true ranking is: ")
print(yreal.flatten())
print("The Latent mean is: ")
latentmu = np.hstack(([0], az.summary(trace, var_names=["mu_hat"])["mean"].values))
print(np.round(latentmu, 2))
print("The estimated ranking is: ")
print(np.argsort(latentmu))
if __name__ == "__main__":
run()