|
22 | 22 | from pytensor.tensor.random.op import RandomVariable
|
23 | 23 |
|
24 | 24 | from pymc.distributions import transforms
|
25 |
| -from pymc.distributions.continuous import Normal, get_tau_sigma |
| 25 | +from pymc.distributions.continuous import Gamma, LogNormal, Normal, get_tau_sigma |
26 | 26 | from pymc.distributions.discrete import Binomial, NegativeBinomial, Poisson
|
27 | 27 | from pymc.distributions.dist_math import check_parameters
|
28 | 28 | from pymc.distributions.distribution import (
|
|
34 | 34 | )
|
35 | 35 | from pymc.distributions.shape_utils import _change_dist_size, change_dist_size
|
36 | 36 | from pymc.distributions.transforms import _default_transform
|
| 37 | +from pymc.distributions.truncated import Truncated |
37 | 38 | from pymc.logprob.abstract import _logcdf, _logcdf_helper, _logprob, _logprob_helper
|
38 | 39 | from pymc.logprob.transforms import IntervalTransform
|
39 | 40 | from pymc.logprob.utils import ignore_logprob
|
|
42 | 43 | from pymc.vartypes import continuous_types, discrete_types
|
43 | 44 |
|
44 | 45 | __all__ = [
|
| 46 | + "HurdleGamma", |
| 47 | + "HurdleLogNormal", |
| 48 | + "HurdleNegativeBinomial", |
| 49 | + "HurdlePoisson", |
45 | 50 | "Mixture",
|
46 | 51 | "NormalMixture",
|
47 | 52 | "ZeroInflatedBinomial",
|
@@ -794,3 +799,241 @@ def dist(cls, psi, mu=None, alpha=None, p=None, n=None, **kwargs):
|
794 | 799 | nonzero_dist=NegativeBinomial.dist(mu=mu, alpha=alpha, p=p, n=n),
|
795 | 800 | **kwargs,
|
796 | 801 | )
|
| 802 | + |
| 803 | + |
| 804 | +def _hurdle_mixture(*, name, nonzero_p, nonzero_dist, dtype, **kwargs): |
| 805 | + """Helper function to create a hurdle mixtures |
| 806 | +
|
| 807 | + If name is `None`, this function returns an unregistered variable |
| 808 | +
|
| 809 | + In hurdle models, the zeros come from a completely different process than the rest of the data. |
| 810 | + In other words, the zeros are not inflated, they come from a different process. |
| 811 | + """ |
| 812 | + if dtype == "float": |
| 813 | + zero = 0.0 |
| 814 | + lower = np.finfo(pytensor.config.floatX).eps |
| 815 | + elif dtype == "int": |
| 816 | + zero = 0 |
| 817 | + lower = 1 |
| 818 | + else: |
| 819 | + raise ValueError("dtype must be 'float' or 'int'") |
| 820 | + |
| 821 | + nonzero_p = pt.as_tensor_variable(floatX(nonzero_p)) |
| 822 | + weights = pt.stack([1 - nonzero_p, nonzero_p], axis=-1) |
| 823 | + comp_dists = [ |
| 824 | + DiracDelta.dist(zero), |
| 825 | + Truncated.dist(nonzero_dist, lower=lower), |
| 826 | + ] |
| 827 | + |
| 828 | + if name is not None: |
| 829 | + return Mixture(name, weights, comp_dists, **kwargs) |
| 830 | + else: |
| 831 | + return Mixture.dist(weights, comp_dists, **kwargs) |
| 832 | + |
| 833 | + |
| 834 | +class HurdlePoisson: |
| 835 | + R""" |
| 836 | + Hurdle Poisson log-likelihood. |
| 837 | +
|
| 838 | + The Poisson distribution is often used to model the number of events occurring |
| 839 | + in a fixed period of time or space when the times or locations |
| 840 | + at which events occur are independent. |
| 841 | +
|
| 842 | + The difference with ZeroInflatedPoisson is that the zeros are not inflated, |
| 843 | + they come from a completely independent process. |
| 844 | +
|
| 845 | + The pmf of this distribution is |
| 846 | +
|
| 847 | + .. math:: |
| 848 | +
|
| 849 | + f(x \mid \psi, \mu) = |
| 850 | + \left\{ |
| 851 | + \begin{array}{l} |
| 852 | + (1 - \psi) \ \text{if } x = 0 \\ |
| 853 | + \psi |
| 854 | + \frac{\text{PoissonPDF}(x \mid \mu))} |
| 855 | + {1 - \text{PoissonCDF}(0 \mid \mu)} \ \text{if } x=1,2,3,\ldots |
| 856 | + \end{array} |
| 857 | + \right. |
| 858 | +
|
| 859 | +
|
| 860 | + Parameters |
| 861 | + ---------- |
| 862 | + psi : tensor_like of float |
| 863 | + Expected proportion of Poisson variates (0 < psi < 1) |
| 864 | + mu : tensor_like of float |
| 865 | + Expected number of occurrences (mu >= 0). |
| 866 | + """ |
| 867 | + |
| 868 | + def __new__(cls, name, psi, mu, **kwargs): |
| 869 | + return _hurdle_mixture( |
| 870 | + name=name, nonzero_p=psi, nonzero_dist=Poisson.dist(mu=mu), dtype="int", **kwargs |
| 871 | + ) |
| 872 | + |
| 873 | + @classmethod |
| 874 | + def dist(cls, psi, mu, **kwargs): |
| 875 | + return _hurdle_mixture( |
| 876 | + name=None, nonzero_p=psi, nonzero_dist=Poisson.dist(mu=mu), dtype="int", **kwargs |
| 877 | + ) |
| 878 | + |
| 879 | + |
| 880 | +class HurdleNegativeBinomial: |
| 881 | + R""" |
| 882 | + Hurdle Negative Binomial log-likelihood. |
| 883 | +
|
| 884 | + The negative binomial distribution describes a Poisson random variable |
| 885 | + whose rate parameter is gamma distributed. |
| 886 | +
|
| 887 | + The difference with ZeroInflatedNegativeBinomial is that the zeros are not inflated, |
| 888 | + they come from a completely independent process. |
| 889 | +
|
| 890 | + The pmf of this distribution is |
| 891 | +
|
| 892 | + .. math:: |
| 893 | +
|
| 894 | + f(x \mid \psi, \mu, \alpha) = |
| 895 | + \left\{ |
| 896 | + \begin{array}{l} |
| 897 | + (1 - \psi) \ \text{if } x = 0 \\ |
| 898 | + \psi |
| 899 | + \frac{\text{NegativeBinomialPDF}(x \mid \mu, \alpha))} |
| 900 | + {1 - \text{NegativeBinomialCDF}(0 \mid \mu, \alpha)} \ \text{if } x=1,2,3,\ldots |
| 901 | + \end{array} |
| 902 | + \right. |
| 903 | +
|
| 904 | + Parameters |
| 905 | + ---------- |
| 906 | + psi : tensor_like of float |
| 907 | + Expected proportion of Negative Binomial variates (0 < psi < 1) |
| 908 | + alpha : tensor_like of float |
| 909 | + Gamma distribution shape parameter (alpha > 0). |
| 910 | + mu : tensor_like of float |
| 911 | + Gamma distribution mean (mu > 0). |
| 912 | + p : tensor_like of float |
| 913 | + Alternative probability of success in each trial (0 < p < 1). |
| 914 | + n : tensor_like of float |
| 915 | + Alternative number of target success trials (n > 0) |
| 916 | + """ |
| 917 | + |
| 918 | + def __new__(cls, name, psi, mu=None, alpha=None, p=None, n=None, **kwargs): |
| 919 | + return _hurdle_mixture( |
| 920 | + name=name, |
| 921 | + nonzero_p=psi, |
| 922 | + nonzero_dist=NegativeBinomial.dist(mu=mu, alpha=alpha, p=p, n=n), |
| 923 | + dtype="int", |
| 924 | + **kwargs, |
| 925 | + ) |
| 926 | + |
| 927 | + @classmethod |
| 928 | + def dist(cls, psi, mu=None, alpha=None, p=None, n=None, **kwargs): |
| 929 | + return _hurdle_mixture( |
| 930 | + name=None, |
| 931 | + nonzero_p=psi, |
| 932 | + nonzero_dist=NegativeBinomial.dist(mu=mu, alpha=alpha, p=p, n=n), |
| 933 | + dtype="int", |
| 934 | + **kwargs, |
| 935 | + ) |
| 936 | + |
| 937 | + |
| 938 | +class HurdleGamma: |
| 939 | + R""" |
| 940 | + Hurdle Gamma log-likelihood. |
| 941 | +
|
| 942 | + .. math:: |
| 943 | +
|
| 944 | + f(x \mid \psi, \alpha, \beta) = |
| 945 | + \left\{ |
| 946 | + \begin{array}{l} |
| 947 | + (1 - \psi) \ \text{if } x = 0 \\ |
| 948 | + \psi |
| 949 | + \frac{\text{GammaPDF}(x \mid \alpha, \beta))} |
| 950 | + {1 - \text{GammaCDF}(\epsilon \mid \alpha, \beta)} \ \text{if } x=1,2,3,\ldots |
| 951 | + \end{array} |
| 952 | + \right. |
| 953 | +
|
| 954 | + where :math:`\epsilon` is the machine precision. |
| 955 | +
|
| 956 | + Parameters |
| 957 | + ---------- |
| 958 | + psi : tensor_like of float |
| 959 | + Expected proportion of Gamma variates (0 < psi < 1) |
| 960 | + alpha : tensor_like of float, optional |
| 961 | + Shape parameter (alpha > 0). |
| 962 | + beta : tensor_like of float, optional |
| 963 | + Rate parameter (beta > 0). |
| 964 | + mu : tensor_like of float, optional |
| 965 | + Alternative shape parameter (mu > 0). |
| 966 | + sigma : tensor_like of float, optional |
| 967 | + Alternative scale parameter (sigma > 0). |
| 968 | + """ |
| 969 | + |
| 970 | + def __new__(cls, name, psi, alpha=None, beta=None, mu=None, sigma=None, **kwargs): |
| 971 | + return _hurdle_mixture( |
| 972 | + name=name, |
| 973 | + nonzero_p=psi, |
| 974 | + nonzero_dist=Gamma.dist(alpha=alpha, beta=beta, mu=mu, sigma=sigma), |
| 975 | + dtype="float", |
| 976 | + **kwargs, |
| 977 | + ) |
| 978 | + |
| 979 | + @classmethod |
| 980 | + def dist(cls, psi, alpha=None, beta=None, mu=None, sigma=None, **kwargs): |
| 981 | + return _hurdle_mixture( |
| 982 | + name=None, |
| 983 | + nonzero_p=psi, |
| 984 | + nonzero_dist=Gamma.dist(alpha=alpha, beta=beta, mu=mu, sigma=sigma), |
| 985 | + dtype="float", |
| 986 | + **kwargs, |
| 987 | + ) |
| 988 | + |
| 989 | + |
| 990 | +class HurdleLogNormal: |
| 991 | + R""" |
| 992 | + Hurdle LogNormal log-likelihood. |
| 993 | +
|
| 994 | + .. math:: |
| 995 | +
|
| 996 | + f(x \mid \psi, \mu, \sigma) = |
| 997 | + \left\{ |
| 998 | + \begin{array}{l} |
| 999 | + (1 - \psi) \ \text{if } x = 0 \\ |
| 1000 | + \psi |
| 1001 | + \frac{\text{LogNormalPDF}(x \mid \mu, \sigma))} |
| 1002 | + {1 - \text{LogNormalCDF}(\epsilon \mid \mu, \sigma)} \ \text{if } x=1,2,3,\ldots |
| 1003 | + \end{array} |
| 1004 | + \right. |
| 1005 | +
|
| 1006 | + where :math:`\epsilon` is the machine precision. |
| 1007 | +
|
| 1008 | + Parameters |
| 1009 | + ---------- |
| 1010 | + psi : tensor_like of float |
| 1011 | + Expected proportion of Gamma variates (0 < psi < 1) |
| 1012 | + mu : tensor_like of float, default 0 |
| 1013 | + Location parameter. |
| 1014 | + sigma : tensor_like of float, optional |
| 1015 | + Standard deviation. (sigma > 0). (only required if tau is not specified). |
| 1016 | + Defaults to 1. |
| 1017 | + tau : tensor_like of float, optional |
| 1018 | + Scale parameter (tau > 0). (only required if sigma is not specified). |
| 1019 | + Defaults to 1. |
| 1020 | + """ |
| 1021 | + |
| 1022 | + def __new__(cls, name, psi, mu=0, sigma=None, tau=None, **kwargs): |
| 1023 | + return _hurdle_mixture( |
| 1024 | + name=name, |
| 1025 | + nonzero_p=psi, |
| 1026 | + nonzero_dist=LogNormal.dist(mu=mu, sigma=sigma, tau=tau), |
| 1027 | + dtype="float", |
| 1028 | + **kwargs, |
| 1029 | + ) |
| 1030 | + |
| 1031 | + @classmethod |
| 1032 | + def dist(cls, psi, mu=0, sigma=None, tau=None, **kwargs): |
| 1033 | + return _hurdle_mixture( |
| 1034 | + name=None, |
| 1035 | + nonzero_p=psi, |
| 1036 | + nonzero_dist=LogNormal.dist(mu=mu, sigma=sigma, tau=tau), |
| 1037 | + dtype="float", |
| 1038 | + **kwargs, |
| 1039 | + ) |
0 commit comments