Skip to content

Commit c98c5d7

Browse files
committed
revert removing test_examples
1 parent 6af422b commit c98c5d7

File tree

1 file changed

+382
-0
lines changed

1 file changed

+382
-0
lines changed

pymc3/tests/test_examples.py

Lines changed: 382 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,382 @@
1+
# Copyright 2020 The PyMC Developers
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
15+
import matplotlib
16+
import numpy as np
17+
import pandas as pd
18+
import pytest
19+
import theano
20+
import theano.tensor as tt
21+
22+
from packaging import version
23+
24+
import pymc3 as pm
25+
26+
from pymc3.tests.helpers import SeededTest
27+
from pymc3.theanof import floatX
28+
29+
if version.parse(matplotlib.__version__) < version.parse("3.3"):
30+
matplotlib.use("Agg", warn=False)
31+
else:
32+
matplotlib.use("Agg")
33+
34+
35+
def get_city_data():
36+
"""Helper to get city data"""
37+
data = pd.read_csv(pm.get_data("srrs2.dat"))
38+
cty_data = pd.read_csv(pm.get_data("cty.dat"))
39+
40+
data = data[data.state == "MN"]
41+
42+
data["fips"] = data.stfips * 1000 + data.cntyfips
43+
cty_data["fips"] = cty_data.stfips * 1000 + cty_data.ctfips
44+
data["lradon"] = np.log(np.where(data.activity == 0, 0.1, data.activity))
45+
data = data.merge(cty_data, "inner", on="fips")
46+
47+
unique = data[["fips"]].drop_duplicates()
48+
unique["group"] = np.arange(len(unique))
49+
unique.set_index("fips")
50+
return data.merge(unique, "inner", on="fips")
51+
52+
53+
class TestARM5_4(SeededTest):
54+
def build_model(self):
55+
data = pd.read_csv(
56+
pm.get_data("wells.dat"),
57+
delimiter=" ",
58+
index_col="id",
59+
dtype={"switch": np.int8},
60+
)
61+
data.dist /= 100
62+
data.educ /= 4
63+
col = data.columns
64+
P = data[col[1:]]
65+
P -= P.mean()
66+
P["1"] = 1
67+
68+
with pm.Model() as model:
69+
effects = pm.Normal("effects", mu=0, sigma=100, shape=len(P.columns))
70+
logit_p = tt.dot(floatX(np.array(P)), effects)
71+
pm.Bernoulli("s", logit_p=logit_p, observed=floatX(data.switch.values))
72+
return model
73+
74+
def test_run(self):
75+
model = self.build_model()
76+
with model:
77+
pm.sample(50, tune=50)
78+
79+
80+
class TestARM12_6(SeededTest):
81+
def build_model(self):
82+
data = get_city_data()
83+
84+
self.obs_means = data.groupby("fips").lradon.mean().to_numpy()
85+
86+
lradon = data.lradon.to_numpy()
87+
floor = data.floor.to_numpy()
88+
group = data.group.to_numpy()
89+
90+
with pm.Model() as model:
91+
groupmean = pm.Normal("groupmean", 0, 10.0 ** -2.0)
92+
groupsd = pm.Uniform("groupsd", 0, 10.0)
93+
sd = pm.Uniform("sd", 0, 10.0)
94+
floor_m = pm.Normal("floor_m", 0, 5.0 ** -2.0)
95+
means = pm.Normal("means", groupmean, groupsd ** -2.0, shape=len(self.obs_means))
96+
pm.Normal("lr", floor * floor_m + means[group], sd ** -2.0, observed=lradon)
97+
return model
98+
99+
def too_slow(self):
100+
model = self.build_model()
101+
start = {
102+
"groupmean": self.obs_means.mean(),
103+
"groupsd_interval__": 0,
104+
"sd_interval__": 0,
105+
"means": self.obs_means,
106+
"floor_m": 0.0,
107+
}
108+
with model:
109+
start = pm.find_MAP(
110+
start=start,
111+
vars=[model["groupmean"], model["sd_interval__"], model["floor_m"]],
112+
)
113+
step = pm.NUTS(model.vars, scaling=start)
114+
pm.sample(50, step=step, start=start)
115+
116+
117+
class TestARM12_6Uranium(SeededTest):
118+
def build_model(self):
119+
data = get_city_data()
120+
self.obs_means = data.groupby("fips").lradon.mean()
121+
122+
lradon = data.lradon.to_numpy()
123+
floor = data.floor.to_numpy()
124+
group = data.group.to_numpy()
125+
ufull = data.Uppm.to_numpy()
126+
127+
with pm.Model() as model:
128+
groupmean = pm.Normal("groupmean", 0, 10.0 ** -2.0)
129+
groupsd = pm.Uniform("groupsd", 0, 10.0)
130+
sd = pm.Uniform("sd", 0, 10.0)
131+
floor_m = pm.Normal("floor_m", 0, 5.0 ** -2.0)
132+
u_m = pm.Normal("u_m", 0, 5.0 ** -2)
133+
means = pm.Normal("means", groupmean, groupsd ** -2.0, shape=len(self.obs_means))
134+
pm.Normal(
135+
"lr",
136+
floor * floor_m + means[group] + ufull * u_m,
137+
sd ** -2.0,
138+
observed=lradon,
139+
)
140+
return model
141+
142+
def too_slow(self):
143+
model = self.build_model()
144+
with model:
145+
start = pm.Point(
146+
{
147+
"groupmean": self.obs_means.mean(),
148+
"groupsd_interval__": 0,
149+
"sd_interval__": 0,
150+
"means": np.array(self.obs_means),
151+
"u_m": np.array([0.72]),
152+
"floor_m": 0.0,
153+
}
154+
)
155+
156+
start = pm.find_MAP(start, model.vars[:-1])
157+
H = model.fastd2logp()
158+
h = np.diag(H(start))
159+
160+
step = pm.HamiltonianMC(model.vars, h)
161+
pm.sample(50, step=step, start=start)
162+
163+
164+
def build_disaster_model(masked=False):
165+
# fmt: off
166+
disasters_data = np.array([4, 5, 4, 0, 1, 4, 3, 4, 0, 6, 3, 3, 4, 0, 2, 6,
167+
3, 3, 5, 4, 5, 3, 1, 4, 4, 1, 5, 5, 3, 4, 2, 5,
168+
2, 2, 3, 4, 2, 1, 3, 2, 2, 1, 1, 1, 1, 3, 0, 0,
169+
1, 0, 1, 1, 0, 0, 3, 1, 0, 3, 2, 2, 0, 1, 1, 1,
170+
0, 1, 0, 1, 0, 0, 0, 2, 1, 0, 0, 0, 1, 1, 0, 2,
171+
3, 3, 1, 1, 2, 1, 1, 1, 1, 2, 4, 2, 0, 0, 1, 4,
172+
0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 1])
173+
# fmt: on
174+
if masked:
175+
disasters_data[[23, 68]] = -1
176+
disasters_data = np.ma.masked_values(disasters_data, value=-1)
177+
years = len(disasters_data)
178+
179+
with pm.Model() as model:
180+
# Prior for distribution of switchpoint location
181+
switchpoint = pm.DiscreteUniform("switchpoint", lower=0, upper=years)
182+
# Priors for pre- and post-switch mean number of disasters
183+
early_mean = pm.Exponential("early_mean", lam=1.0)
184+
late_mean = pm.Exponential("late_mean", lam=1.0)
185+
# Allocate appropriate Poisson rates to years before and after current
186+
# switchpoint location
187+
idx = np.arange(years)
188+
rate = tt.switch(switchpoint >= idx, early_mean, late_mean)
189+
# Data likelihood
190+
pm.Poisson("disasters", rate, observed=disasters_data)
191+
return model
192+
193+
194+
@pytest.mark.xfail(condition=(theano.config.floatX == "float32"), reason="Fails on float32")
195+
class TestDisasterModel(SeededTest):
196+
# Time series of recorded coal mining disasters in the UK from 1851 to 1962
197+
def test_disaster_model(self):
198+
model = build_disaster_model(masked=False)
199+
with model:
200+
# Initial values for stochastic nodes
201+
start = {"early_mean": 2.0, "late_mean": 3.0}
202+
# Use slice sampler for means (other variables auto-selected)
203+
step = pm.Slice([model.early_mean_log__, model.late_mean_log__])
204+
tr = pm.sample(500, tune=50, start=start, step=step, chains=2)
205+
pm.summary(tr)
206+
207+
def test_disaster_model_missing(self):
208+
model = build_disaster_model(masked=True)
209+
with model:
210+
# Initial values for stochastic nodes
211+
start = {"early_mean": 2.0, "late_mean": 3.0}
212+
# Use slice sampler for means (other variables auto-selected)
213+
step = pm.Slice([model.early_mean_log__, model.late_mean_log__])
214+
tr = pm.sample(500, tune=50, start=start, step=step, chains=2)
215+
pm.summary(tr)
216+
217+
218+
class TestGLMLinear(SeededTest):
219+
def build_model(self):
220+
size = 50
221+
true_intercept = 1
222+
true_slope = 2
223+
self.x = np.linspace(0, 1, size)
224+
self.y = true_intercept + self.x * true_slope + np.random.normal(scale=0.5, size=size)
225+
data = dict(x=self.x, y=self.y)
226+
with pm.Model() as model:
227+
pm.GLM.from_formula("y ~ x", data)
228+
return model
229+
230+
def test_run(self):
231+
with self.build_model():
232+
start = pm.find_MAP(method="Powell")
233+
pm.sample(50, pm.Slice(), start=start)
234+
235+
236+
class TestLatentOccupancy(SeededTest):
237+
"""
238+
From the PyMC example list
239+
latent_occupancy.py
240+
Simple model demonstrating the estimation of occupancy, using latent variables. Suppose
241+
a population of n sites, with some proportion pi being occupied. Each site is surveyed,
242+
yielding an array of counts, y:
243+
y = [3, 0, 0, 2, 1, 0, 1, 0, ..., ]
244+
This is a classic zero-inflated count problem, where more zeros appear in the data than would
245+
be predicted by a simple Poisson model. We have, in fact, a mixture of models; one, conditional
246+
on occupancy, with a poisson mean of theta, and another, conditional on absence, with mean zero.
247+
One way to tackle the problem is to model the latent state of 'occupancy' as a Bernoulli
248+
variable at each site, with some unknown probability:
249+
z_i ~ Bern(pi)
250+
These latent variables can then be used to generate an array of Poisson parameters:
251+
t_i = theta (if z_i=1) or 0 (if z_i=0)
252+
Hence, the likelihood is just:
253+
y_i = Poisson(t_i)
254+
(Note in this elementary model, we are ignoring the issue of imperfect detection.)
255+
Created by Chris Fonnesbeck on 2008-07-28.
256+
Copyright (c) 2008 University of Otago. All rights reserved.
257+
"""
258+
259+
def setup_method(self):
260+
super().setup_method()
261+
# Sample size
262+
n = 100
263+
# True mean count, given occupancy
264+
theta = 2.1
265+
# True occupancy
266+
pi = 0.4
267+
# Simulate some data data
268+
self.y = ((np.random.random(n) < pi) * np.random.poisson(lam=theta, size=n)).astype("int16")
269+
270+
def build_model(self):
271+
with pm.Model() as model:
272+
# Estimated occupancy
273+
psi = pm.Beta("psi", 1, 1)
274+
# Latent variable for occupancy
275+
pm.Bernoulli("z", psi, shape=self.y.shape)
276+
# Estimated mean count
277+
theta = pm.Uniform("theta", 0, 100)
278+
# Poisson likelihood
279+
pm.ZeroInflatedPoisson("y", psi, theta, observed=self.y)
280+
return model
281+
282+
def test_run(self):
283+
model = self.build_model()
284+
with model:
285+
start = {
286+
"psi": np.array(0.5, dtype="f"),
287+
"z": (self.y > 0).astype("int16"),
288+
"theta": np.array(5, dtype="f"),
289+
}
290+
step_one = pm.Metropolis([model.theta_interval__, model.psi_logodds__])
291+
step_two = pm.BinaryMetropolis([model.z])
292+
pm.sample(50, step=[step_one, step_two], start=start, chains=1)
293+
294+
295+
@pytest.mark.xfail(
296+
condition=(theano.config.floatX == "float32"),
297+
reason="Fails on float32 due to starting inf at starting logP",
298+
)
299+
class TestRSV(SeededTest):
300+
"""
301+
This model estimates the population prevalence of respiratory syncytial virus
302+
(RSV) among children in Amman, Jordan, based on 3 years of admissions diagnosed
303+
with RSV to Al Bashir hospital.
304+
To estimate this parameter from raw counts of diagnoses, we need to establish
305+
the population of 1-year-old children from which the diagnosed individuals
306+
were sampled. This involved correcting census data (national estimate of
307+
1-year-olds) for the proportion of the population in the city, as well as for
308+
the market share of the hospital. The latter is based on expert esimate, and
309+
hence encoded as a prior.
310+
"""
311+
312+
def build_model(self):
313+
# 1-year-old children in Jordan
314+
kids = np.array([180489, 191817, 190830])
315+
# Proportion of population in Amman
316+
amman_prop = 0.35
317+
# infant RSV cases in Al Bashir hostpital
318+
rsv_cases = np.array([40, 59, 65])
319+
with pm.Model() as model:
320+
# Al Bashir hospital market share
321+
market_share = pm.Uniform("market_share", 0.5, 0.6)
322+
# Number of 1 y.o. in Amman
323+
n_amman = pm.Binomial("n_amman", kids, amman_prop, shape=3)
324+
# Prior probability
325+
prev_rsv = pm.Beta("prev_rsv", 1, 5, shape=3)
326+
# RSV in Amman
327+
y_amman = pm.Binomial("y_amman", n_amman, prev_rsv, shape=3, testval=100)
328+
# Likelihood for number with RSV in hospital (assumes Pr(hosp | RSV) = 1)
329+
pm.Binomial("y_hosp", y_amman, market_share, observed=rsv_cases)
330+
return model
331+
332+
def test_run(self):
333+
with self.build_model():
334+
pm.sample(50, step=[pm.NUTS(), pm.Metropolis()])
335+
336+
337+
class TestMultilevelNormal(SeededTest):
338+
"""
339+
Toy three-level normal model sampled using MLDA. The finest model is a
340+
Normal distribution with unknown mean and sigma=1.0 where we have only one
341+
observed datum (y = 11.0). The coarse models are the same but with the observed
342+
datum changed to y = 11.5 and y = 12.0. This is a very simple way to create
343+
a 3-level system of "approximate" coarse models.
344+
Normals with
345+
"""
346+
347+
def build_models(self):
348+
349+
np.random.seed(1234)
350+
true_mean = 11.0
351+
y = np.array([true_mean])
352+
353+
with pm.Model() as model_coarse_0:
354+
sigma = 1.0
355+
x_coeff = pm.Normal("x", true_mean, sigma=10.0)
356+
pm.Normal("y", mu=x_coeff, sigma=sigma, observed=y + 1.0)
357+
358+
with pm.Model() as model_coarse_1:
359+
sigma = 1.0
360+
x_coeff = pm.Normal("x", true_mean, sigma=10.0)
361+
pm.Normal("y", mu=x_coeff, sigma=sigma, observed=y + 0.5)
362+
363+
coarse_models = [model_coarse_0, model_coarse_1]
364+
365+
with pm.Model() as model:
366+
sigma = 1.0
367+
x_coeff = pm.Normal("x", true_mean, sigma=10.0)
368+
pm.Normal("y", mu=x_coeff, sigma=sigma, observed=y)
369+
370+
return model, coarse_models
371+
372+
def test_run(self):
373+
model, coarse_models = self.build_models()
374+
375+
with model:
376+
step = pm.MLDA(subsampling_rates=2, coarse_models=coarse_models)
377+
pm.sample(draws=50, chains=2, tune=50, step=step)
378+
379+
step = pm.MLDA(
380+
subsampling_rates=2, coarse_models=coarse_models, base_sampler="Metropolis"
381+
)
382+
pm.sample(draws=50, chains=2, tune=50, step=step)

0 commit comments

Comments
 (0)