Skip to content

Use Random Generator in SMC instead of global seeding #5131

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 3 commits into from
Nov 3, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
22 changes: 13 additions & 9 deletions pymc/smc/sample_smc.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ def sample_smc(
*,
start=None,
model=None,
random_seed=-1,
random_seed=None,
chains=None,
cores=None,
compute_convergence_checks=True,
Expand Down Expand Up @@ -191,15 +191,19 @@ def sample_smc(
cores = min(chains, cores)

if random_seed == -1:
raise FutureWarning(
f"random_seed should be a non-negative integer or None, got: {random_seed}"
"This will raise a ValueError in the Future"
)
random_seed = None
if chains == 1 and isinstance(random_seed, int):
random_seed = [random_seed]
if random_seed is None or isinstance(random_seed, int):
if random_seed is not None:
np.random.seed(random_seed)
random_seed = [np.random.randint(2 ** 30) for _ in range(chains)]
if not isinstance(random_seed, Iterable):
raise TypeError("Invalid value for `random_seed`. Must be tuple, list or int")
if isinstance(random_seed, int) or random_seed is None:
rng = np.random.default_rng(seed=random_seed)
random_seed = list(rng.integers(2 ** 30, size=chains))
elif isinstance(random_seed, Iterable):
if len(random_seed) != chains:
raise ValueError(f"Length of seeds ({len(seeds)}) must match number of chains {chains}")
else:
raise TypeError("Invalid value for `random_seed`. Must be tuple, list, int or None")

model = modelcontext(model)

Expand Down
22 changes: 11 additions & 11 deletions pymc/smc/smc.py
Original file line number Diff line number Diff line change
Expand Up @@ -126,7 +126,7 @@ def __init__(
draws=2000,
start=None,
model=None,
random_seed=-1,
random_seed=None,
threshold=0.5,
):
"""
Expand All @@ -140,6 +140,8 @@ def __init__(
Starting point in parameter space. It should be a list of dict with length `chains`.
When None (default) the starting point is sampled from the prior distribution.
model: Model (optional if in ``with`` context)).
random_seed: int
Value used to initialize the random number generator.
threshold: float
Determines the change of beta from stage to stage, i.e.indirectly the number of stages,
the higher the value of `threshold` the higher the number of stages. Defaults to 0.5.
Expand All @@ -151,10 +153,7 @@ def __init__(
self.start = start
self.threshold = threshold
self.model = model
self.random_seed = random_seed

if self.random_seed != -1:
np.random.seed(self.random_seed)
self.rng = np.random.default_rng(seed=random_seed)

self.model = modelcontext(model)
self.variables = inputvars(self.model.value_vars)
Expand Down Expand Up @@ -190,7 +189,7 @@ def _initialize_kernel(self):

"""
# Create dictionary that stores original variables shape and size
initial_point = self.model.initial_point
initial_point = self.model.recompute_initial_point(seed=self.rng.integers(2 ** 30))
for v in self.variables:
self.var_info[v.name] = (initial_point[v.name].shape, initial_point[v.name].size)

Expand Down Expand Up @@ -262,7 +261,7 @@ def update_beta_and_weights(self):

def resample(self):
"""Resample particles based on importance weights"""
self.resampling_indexes = np.random.choice(
self.resampling_indexes = self.rng.choice(
np.arange(self.draws), size=self.draws, p=self.weights
)

Expand Down Expand Up @@ -382,11 +381,11 @@ def mutate(self):
ac_ = np.empty((self.n_steps, self.draws))

cov = self.proposal_dist.cov
log_R = np.log(np.random.rand(self.n_steps, self.draws))
log_R = np.log(self.rng.random((self.n_steps, self.draws)))
for n_step in range(self.n_steps):
# The proposal is independent from the current point.
# We have to take that into account to compute the Metropolis-Hastings acceptance
proposal = floatX(self.proposal_dist.rvs(size=self.draws))
proposal = floatX(self.proposal_dist.rvs(size=self.draws, random_state=self.rng))
proposal = proposal.reshape(len(proposal), -1)
# To do that we compute the logp of moving to a new point
forward = self.proposal_dist.logpdf(proposal)
Expand Down Expand Up @@ -497,11 +496,12 @@ def mutate(self):
"""Metropolis-Hastings perturbation."""
ac_ = np.empty((self.n_steps, self.draws))

log_R = np.log(np.random.rand(self.n_steps, self.draws))
log_R = np.log(self.rng.random((self.n_steps, self.draws)))
for n_step in range(self.n_steps):
proposal = floatX(
self.tempered_posterior
+ self.proposal_dist(num_draws=self.draws) * self.proposal_scales[:, None]
+ self.proposal_dist(num_draws=self.draws, rng=self.rng)
* self.proposal_scales[:, None]
)
ll = np.array([self.likelihood_logp_func(prop) for prop in proposal])
pl = np.array([self.prior_logp_func(prop) for prop in proposal])
Expand Down
50 changes: 32 additions & 18 deletions pymc/step_methods/metropolis.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from typing import Any, Callable, Dict, List, Tuple
from typing import Any, Callable, Dict, List, Optional, Tuple

import numpy as np
import numpy.random as nr
Expand Down Expand Up @@ -50,50 +50,64 @@


class Proposal:
def __init__(self, s):
def __init__(self, s, rng_seed: Optional[int] = None):
self.s = s
self.rng = np.random.default_rng(rng_seed)


class NormalProposal(Proposal):
def __call__(self):
return nr.normal(scale=self.s)
def __call__(self, rng: Optional[np.random.Generator] = None):
if rng is None:
rng = self.rng
return rng.normal(scale=self.s)


class UniformProposal(Proposal):
def __call__(self):
return nr.uniform(low=-self.s, high=self.s, size=len(self.s))
def __call__(self, rng: Optional[np.random.Generator] = None):
if rng is None:
rng = self.rng
return rng.uniform(low=-self.s, high=self.s, size=len(self.s))


class CauchyProposal(Proposal):
def __call__(self):
return nr.standard_cauchy(size=np.size(self.s)) * self.s
def __call__(self, rng: Optional[np.random.Generator] = None):
if rng is None:
rng = self.rng
return rng.standard_cauchy(size=np.size(self.s)) * self.s


class LaplaceProposal(Proposal):
def __call__(self):
def __call__(self, rng: Optional[np.random.Generator] = None):
if rng is None:
rng = self.rng
size = np.size(self.s)
return (nr.standard_exponential(size=size) - nr.standard_exponential(size=size)) * self.s
return (rng.standard_exponential(size=size) - rng.standard_exponential(size=size)) * self.s


class PoissonProposal(Proposal):
def __call__(self):
return nr.poisson(lam=self.s, size=np.size(self.s)) - self.s
def __call__(self, rng: Optional[np.random.Generator] = None):
if rng is None:
rng = self.rng
return rng.poisson(lam=self.s, size=np.size(self.s)) - self.s


class MultivariateNormalProposal(Proposal):
def __init__(self, s):
n, m = s.shape
def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
n, m = self.s.shape
if n != m:
raise ValueError("Covariance matrix is not symmetric.")
self.n = n
self.chol = scipy.linalg.cholesky(s, lower=True)
self.chol = scipy.linalg.cholesky(self.s, lower=True)

def __call__(self, num_draws=None):
def __call__(self, num_draws=None, rng: Optional[np.random.Generator] = None):
if rng is None:
rng = self.rng
if num_draws is not None:
b = np.random.randn(self.n, num_draws)
b = rng.normal(size=(self.n, num_draws))
return np.dot(self.chol, b).T
else:
b = np.random.randn(self.n)
b = rng.normal(size=self.n)
return np.dot(self.chol, b)


Expand Down
9 changes: 9 additions & 0 deletions pymc/tests/helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
from logging.handlers import BufferingHandler

import aesara
import numpy as np
import numpy.random as nr

from aesara.gradient import verify_grad as at_verify_grad
Expand Down Expand Up @@ -123,3 +124,11 @@ def verify_grad(op, pt, n_tests=2, rng=None, *args, **kwargs):
if rng is None:
rng = nr.RandomState(411342)
at_verify_grad(op, pt, n_tests, rng, *args, **kwargs)


def assert_random_state_equal(state1, state2):
for field1, field2 in zip(state1, state2):
if isinstance(field1, np.ndarray):
np.testing.assert_array_equal(field1, field2)
else:
assert field1 == field2
7 changes: 6 additions & 1 deletion pymc/tests/test_smc.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@
from pymc.aesaraf import floatX
from pymc.backends.base import MultiTrace
from pymc.smc.smc import IMH
from pymc.tests.helpers import SeededTest
from pymc.tests.helpers import SeededTest, assert_random_state_equal


class TestSMC(SeededTest):
Expand Down Expand Up @@ -77,8 +77,10 @@ def two_gaussians(x):
y = pm.Normal("y", x, 1, observed=0)

def test_sample(self):
initial_rng_state = np.random.get_state()
with self.SMC_test:
mtrace = pm.sample_smc(draws=self.samples, return_inferencedata=False)
assert_random_state_equal(initial_rng_state, np.random.get_state())

x = mtrace["X"]
mu1d = np.abs(x).mean(axis=0)
Expand Down Expand Up @@ -531,11 +533,14 @@ def test_named_model(self):
class TestMHKernel(SeededTest):
def test_normal_model(self):
data = st.norm(10, 0.5).rvs(1000, random_state=self.get_random_state())

initial_rng_state = np.random.get_state()
with pm.Model() as m:
mu = pm.Normal("mu", 0, 3)
sigma = pm.HalfNormal("sigma", 1)
y = pm.Normal("y", mu, sigma, observed=data)
idata = pm.sample_smc(draws=2000, kernel=pm.smc.MH)
assert_random_state_equal(initial_rng_state, np.random.get_state())

post = idata.posterior.stack(sample=("chain", "draw"))
assert np.abs(post["mu"].mean() - 10) < 0.1
Expand Down