diff --git a/RELEASE-NOTES.md b/RELEASE-NOTES.md index 73cb1dfefb..068f9c2b52 100644 --- a/RELEASE-NOTES.md +++ b/RELEASE-NOTES.md @@ -5,6 +5,7 @@ ### New features - `Mixture` now supports mixtures of multidimensional probability distributions, not just lists of 1D distributions. +- `GLM.from_formula` and `LinearComponent.from_formula` can extract variables from the calling scope. Customizable via the new `eval_env` argument. Fixing #3382. ### Maintenance @@ -506,4 +507,3 @@ Thus, Thomas, Chris and I are pleased to announce that PyMC3 is now in Beta. * maahnman * paul sorenson * zenourn - diff --git a/pymc3/glm/linear.py b/pymc3/glm/linear.py index 4781784e1a..aa25a51235 100644 --- a/pymc3/glm/linear.py +++ b/pymc3/glm/linear.py @@ -84,9 +84,22 @@ def __init__(self, x, y, intercept=True, labels=None, @classmethod def from_formula(cls, formula, data, priors=None, vars=None, - name='', model=None, offset=0.): + name='', model=None, offset=0., eval_env=0): + """Creates linear component from `patsy` formula. + + Parameters + ---------- + formula : str - a patsy formula + data : a dict-like object that can be used to look up variables referenced + in `formula` + eval_env : either a `patsy.EvalEnvironment` or else a depth represented as + an integer which will be passed to `patsy.EvalEnvironment.capture()`. + See `patsy.dmatrix` and `patsy.EvalEnvironment` for details. + Other arguments are documented in the constructor. + """ import patsy - y, x = patsy.dmatrices(formula, data) + eval_env = patsy.EvalEnvironment.capture(eval_env, reference=1) + y, x = patsy.dmatrices(formula, data, eval_env=eval_env) labels = x.design_info.column_names return cls(np.asarray(x), np.asarray(y)[:, -1], intercept=False, labels=labels, priors=priors, vars=vars, name=name, @@ -140,9 +153,23 @@ def __init__(self, x, y, intercept=True, labels=None, @classmethod def from_formula(cls, formula, data, priors=None, vars=None, family='normal', name='', - model=None, offset=0.): + model=None, offset=0., eval_env=0): + """ + Creates GLM from formula. + + Parameters + ---------- + formula : str - a `patsy` formula + data : a dict-like object that can be used to look up variables referenced + in `formula` + eval_env : either a `patsy.EvalEnvironment` or else a depth represented as + an integer which will be passed to `patsy.EvalEnvironment.capture()`. + See `patsy.dmatrix` and `patsy.EvalEnvironment` for details. + Other arguments are documented in the constructor. + """ import patsy - y, x = patsy.dmatrices(formula, data) + eval_env = patsy.EvalEnvironment.capture(eval_env, reference=1) + y, x = patsy.dmatrices(formula, data, eval_env=eval_env) labels = x.design_info.column_names return cls(np.asarray(x), np.asarray(y)[:, -1], intercept=False, labels=labels, priors=priors, vars=vars, family=family, diff --git a/pymc3/tests/test_glm.py b/pymc3/tests/test_glm.py index da1a8ef611..762ef9151c 100644 --- a/pymc3/tests/test_glm.py +++ b/pymc3/tests/test_glm.py @@ -2,6 +2,7 @@ from numpy.testing import assert_equal from .helpers import SeededTest +import pymc3 from pymc3 import Model, Uniform, Normal, find_MAP, Slice, sample from pymc3 import families, GLM, LinearComponent import pandas as pd @@ -117,3 +118,15 @@ def test_boolean_y(self): ) ) assert_equal(model.y.observations, model_bool.y.observations) + + def test_glm_formula_from_calling_scope(self): + """Formula can extract variables from the calling scope.""" + z = pd.Series([10, 20, 30]) + df = pd.DataFrame({"y": [0, 1, 0], "x": [1.0, 2.0, 3.0]}) + GLM.from_formula("y ~ x + z", df, family=pymc3.glm.families.Binomial()) + + def test_linear_component_formula_from_calling_scope(self): + """Formula can extract variables from the calling scope.""" + z = pd.Series([10, 20, 30]) + df = pd.DataFrame({"y": [0, 1, 0], "x": [1.0, 2.0, 3.0]}) + LinearComponent.from_formula("y ~ x + z", df)