Skip to content

Commit a53e865

Browse files
tomicaprettoricardoV94
authored andcommitted
Implement Hurdle distributions
1 parent 970db18 commit a53e865

File tree

4 files changed

+420
-1
lines changed

4 files changed

+420
-1
lines changed

docs/source/api/distributions/mixture.rst

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,3 +11,7 @@ Mixture
1111
ZeroInflatedBinomial
1212
ZeroInflatedNegativeBinomial
1313
ZeroInflatedPoisson
14+
HurdlePoisson
15+
HurdleNegativeBinomial
16+
HurdleGamma
17+
HurdleLogNormal

pymc/distributions/__init__.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -74,6 +74,10 @@
7474
SymbolicRandomVariable,
7575
)
7676
from pymc.distributions.mixture import (
77+
HurdleGamma,
78+
HurdleLogNormal,
79+
HurdleNegativeBinomial,
80+
HurdlePoisson,
7781
Mixture,
7882
NormalMixture,
7983
ZeroInflatedBinomial,
@@ -195,4 +199,8 @@
195199
"Censored",
196200
"CAR",
197201
"PolyaGamma",
202+
"HurdleGamma",
203+
"HurdleLogNormal",
204+
"HurdleNegativeBinomial",
205+
"HurdlePoisson",
198206
]

pymc/distributions/mixture.py

Lines changed: 244 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@
2222
from pytensor.tensor.random.op import RandomVariable
2323

2424
from pymc.distributions import transforms
25-
from pymc.distributions.continuous import Normal, get_tau_sigma
25+
from pymc.distributions.continuous import Gamma, LogNormal, Normal, get_tau_sigma
2626
from pymc.distributions.discrete import Binomial, NegativeBinomial, Poisson
2727
from pymc.distributions.dist_math import check_parameters
2828
from pymc.distributions.distribution import (
@@ -34,6 +34,7 @@
3434
)
3535
from pymc.distributions.shape_utils import _change_dist_size, change_dist_size
3636
from pymc.distributions.transforms import _default_transform
37+
from pymc.distributions.truncated import Truncated
3738
from pymc.logprob.abstract import _logcdf, _logcdf_helper, _logprob, _logprob_helper
3839
from pymc.logprob.transforms import IntervalTransform
3940
from pymc.logprob.utils import ignore_logprob
@@ -42,6 +43,10 @@
4243
from pymc.vartypes import continuous_types, discrete_types
4344

4445
__all__ = [
46+
"HurdleGamma",
47+
"HurdleLogNormal",
48+
"HurdleNegativeBinomial",
49+
"HurdlePoisson",
4550
"Mixture",
4651
"NormalMixture",
4752
"ZeroInflatedBinomial",
@@ -794,3 +799,241 @@ def dist(cls, psi, mu=None, alpha=None, p=None, n=None, **kwargs):
794799
nonzero_dist=NegativeBinomial.dist(mu=mu, alpha=alpha, p=p, n=n),
795800
**kwargs,
796801
)
802+
803+
804+
def _hurdle_mixture(*, name, nonzero_p, nonzero_dist, dtype, **kwargs):
805+
"""Helper function to create a hurdle mixtures
806+
807+
If name is `None`, this function returns an unregistered variable
808+
809+
In hurdle models, the zeros come from a completely different process than the rest of the data.
810+
In other words, the zeros are not inflated, they come from a different process.
811+
"""
812+
if dtype == "float":
813+
zero = 0.0
814+
lower = np.finfo(pytensor.config.floatX).eps
815+
elif dtype == "int":
816+
zero = 0
817+
lower = 1
818+
else:
819+
raise ValueError("dtype must be 'float' or 'int'")
820+
821+
nonzero_p = pt.as_tensor_variable(floatX(nonzero_p))
822+
weights = pt.stack([1 - nonzero_p, nonzero_p], axis=-1)
823+
comp_dists = [
824+
DiracDelta.dist(zero),
825+
Truncated.dist(nonzero_dist, lower=lower),
826+
]
827+
828+
if name is not None:
829+
return Mixture(name, weights, comp_dists, **kwargs)
830+
else:
831+
return Mixture.dist(weights, comp_dists, **kwargs)
832+
833+
834+
class HurdlePoisson:
835+
R"""
836+
Hurdle Poisson log-likelihood.
837+
838+
The Poisson distribution is often used to model the number of events occurring
839+
in a fixed period of time or space when the times or locations
840+
at which events occur are independent.
841+
842+
The difference with ZeroInflatedPoisson is that the zeros are not inflated,
843+
they come from a completely independent process.
844+
845+
The pmf of this distribution is
846+
847+
.. math::
848+
849+
f(x \mid \psi, \mu) =
850+
\left\{
851+
\begin{array}{l}
852+
(1 - \psi) \ \text{if } x = 0 \\
853+
\psi
854+
\frac{\text{PoissonPDF}(x \mid \mu))}
855+
{1 - \text{PoissonCDF}(0 \mid \mu)} \ \text{if } x=1,2,3,\ldots
856+
\end{array}
857+
\right.
858+
859+
860+
Parameters
861+
----------
862+
psi : tensor_like of float
863+
Expected proportion of Poisson variates (0 < psi < 1)
864+
mu : tensor_like of float
865+
Expected number of occurrences (mu >= 0).
866+
"""
867+
868+
def __new__(cls, name, psi, mu, **kwargs):
869+
return _hurdle_mixture(
870+
name=name, nonzero_p=psi, nonzero_dist=Poisson.dist(mu=mu), dtype="int", **kwargs
871+
)
872+
873+
@classmethod
874+
def dist(cls, psi, mu, **kwargs):
875+
return _hurdle_mixture(
876+
name=None, nonzero_p=psi, nonzero_dist=Poisson.dist(mu=mu), dtype="int", **kwargs
877+
)
878+
879+
880+
class HurdleNegativeBinomial:
881+
R"""
882+
Hurdle Negative Binomial log-likelihood.
883+
884+
The negative binomial distribution describes a Poisson random variable
885+
whose rate parameter is gamma distributed.
886+
887+
The difference with ZeroInflatedNegativeBinomial is that the zeros are not inflated,
888+
they come from a completely independent process.
889+
890+
The pmf of this distribution is
891+
892+
.. math::
893+
894+
f(x \mid \psi, \mu, \alpha) =
895+
\left\{
896+
\begin{array}{l}
897+
(1 - \psi) \ \text{if } x = 0 \\
898+
\psi
899+
\frac{\text{NegativeBinomialPDF}(x \mid \mu, \alpha))}
900+
{1 - \text{NegativeBinomialCDF}(0 \mid \mu, \alpha)} \ \text{if } x=1,2,3,\ldots
901+
\end{array}
902+
\right.
903+
904+
Parameters
905+
----------
906+
psi : tensor_like of float
907+
Expected proportion of Negative Binomial variates (0 < psi < 1)
908+
alpha : tensor_like of float
909+
Gamma distribution shape parameter (alpha > 0).
910+
mu : tensor_like of float
911+
Gamma distribution mean (mu > 0).
912+
p : tensor_like of float
913+
Alternative probability of success in each trial (0 < p < 1).
914+
n : tensor_like of float
915+
Alternative number of target success trials (n > 0)
916+
"""
917+
918+
def __new__(cls, name, psi, mu=None, alpha=None, p=None, n=None, **kwargs):
919+
return _hurdle_mixture(
920+
name=name,
921+
nonzero_p=psi,
922+
nonzero_dist=NegativeBinomial.dist(mu=mu, alpha=alpha, p=p, n=n),
923+
dtype="int",
924+
**kwargs,
925+
)
926+
927+
@classmethod
928+
def dist(cls, psi, mu=None, alpha=None, p=None, n=None, **kwargs):
929+
return _hurdle_mixture(
930+
name=None,
931+
nonzero_p=psi,
932+
nonzero_dist=NegativeBinomial.dist(mu=mu, alpha=alpha, p=p, n=n),
933+
dtype="int",
934+
**kwargs,
935+
)
936+
937+
938+
class HurdleGamma:
939+
R"""
940+
Hurdle Gamma log-likelihood.
941+
942+
.. math::
943+
944+
f(x \mid \psi, \alpha, \beta) =
945+
\left\{
946+
\begin{array}{l}
947+
(1 - \psi) \ \text{if } x = 0 \\
948+
\psi
949+
\frac{\text{GammaPDF}(x \mid \alpha, \beta))}
950+
{1 - \text{GammaCDF}(\epsilon \mid \alpha, \beta)} \ \text{if } x=1,2,3,\ldots
951+
\end{array}
952+
\right.
953+
954+
where :math:`\epsilon` is the machine precision.
955+
956+
Parameters
957+
----------
958+
psi : tensor_like of float
959+
Expected proportion of Gamma variates (0 < psi < 1)
960+
alpha : tensor_like of float, optional
961+
Shape parameter (alpha > 0).
962+
beta : tensor_like of float, optional
963+
Rate parameter (beta > 0).
964+
mu : tensor_like of float, optional
965+
Alternative shape parameter (mu > 0).
966+
sigma : tensor_like of float, optional
967+
Alternative scale parameter (sigma > 0).
968+
"""
969+
970+
def __new__(cls, name, psi, alpha=None, beta=None, mu=None, sigma=None, **kwargs):
971+
return _hurdle_mixture(
972+
name=name,
973+
nonzero_p=psi,
974+
nonzero_dist=Gamma.dist(alpha=alpha, beta=beta, mu=mu, sigma=sigma),
975+
dtype="float",
976+
**kwargs,
977+
)
978+
979+
@classmethod
980+
def dist(cls, psi, alpha=None, beta=None, mu=None, sigma=None, **kwargs):
981+
return _hurdle_mixture(
982+
name=None,
983+
nonzero_p=psi,
984+
nonzero_dist=Gamma.dist(alpha=alpha, beta=beta, mu=mu, sigma=sigma),
985+
dtype="float",
986+
**kwargs,
987+
)
988+
989+
990+
class HurdleLogNormal:
991+
R"""
992+
Hurdle LogNormal log-likelihood.
993+
994+
.. math::
995+
996+
f(x \mid \psi, \mu, \sigma) =
997+
\left\{
998+
\begin{array}{l}
999+
(1 - \psi) \ \text{if } x = 0 \\
1000+
\psi
1001+
\frac{\text{LogNormalPDF}(x \mid \mu, \sigma))}
1002+
{1 - \text{LogNormalCDF}(\epsilon \mid \mu, \sigma)} \ \text{if } x=1,2,3,\ldots
1003+
\end{array}
1004+
\right.
1005+
1006+
where :math:`\epsilon` is the machine precision.
1007+
1008+
Parameters
1009+
----------
1010+
psi : tensor_like of float
1011+
Expected proportion of Gamma variates (0 < psi < 1)
1012+
mu : tensor_like of float, default 0
1013+
Location parameter.
1014+
sigma : tensor_like of float, optional
1015+
Standard deviation. (sigma > 0). (only required if tau is not specified).
1016+
Defaults to 1.
1017+
tau : tensor_like of float, optional
1018+
Scale parameter (tau > 0). (only required if sigma is not specified).
1019+
Defaults to 1.
1020+
"""
1021+
1022+
def __new__(cls, name, psi, mu=0, sigma=None, tau=None, **kwargs):
1023+
return _hurdle_mixture(
1024+
name=name,
1025+
nonzero_p=psi,
1026+
nonzero_dist=LogNormal.dist(mu=mu, sigma=sigma, tau=tau),
1027+
dtype="float",
1028+
**kwargs,
1029+
)
1030+
1031+
@classmethod
1032+
def dist(cls, psi, mu=0, sigma=None, tau=None, **kwargs):
1033+
return _hurdle_mixture(
1034+
name=None,
1035+
nonzero_p=psi,
1036+
nonzero_dist=LogNormal.dist(mu=mu, sigma=sigma, tau=tau),
1037+
dtype="float",
1038+
**kwargs,
1039+
)

0 commit comments

Comments
 (0)