Skip to content

Zero inflated binomial #2251

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 12 commits into from
Jun 2, 2017
Merged
2 changes: 1 addition & 1 deletion pymc3/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
from .external import *
from .glm import *
from . import gp
from .math import logsumexp, logit, invlogit, expand_packed_triangular, probit, invprobit
from .math import logaddexp, logsumexp, logit, invlogit, expand_packed_triangular, probit, invprobit
from .model import *
from .stats import *
from .sampling import *
Expand Down
2 changes: 2 additions & 0 deletions pymc3/distributions/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@
from .discrete import Constant
from .discrete import ZeroInflatedPoisson
from .discrete import ZeroInflatedNegativeBinomial
from .discrete import ZeroInflatedBinomial
from .discrete import DiscreteUniform
from .discrete import Geometric
from .discrete import Categorical
Expand Down Expand Up @@ -106,6 +107,7 @@
'Constant',
'ZeroInflatedPoisson',
'ZeroInflatedNegativeBinomial',
'ZeroInflatedBinomial',
'DiscreteUniform',
'Geometric',
'Categorical',
Expand Down
120 changes: 104 additions & 16 deletions pymc3/distributions/discrete.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,10 +8,11 @@
from .dist_math import bound, factln, binomln, betaln, logpow
from .distribution import Discrete, draw_values, generate_samples, reshape_sampled
from pymc3.math import tround
from ..math import logaddexp

__all__ = ['Binomial', 'BetaBinomial', 'Bernoulli', 'DiscreteWeibull',
'Poisson', 'NegativeBinomial', 'ConstantDist', 'Constant',
'ZeroInflatedPoisson', 'ZeroInflatedNegativeBinomial',
'ZeroInflatedPoisson', 'ZeroInflatedBinomial', 'ZeroInflatedNegativeBinomial',
'DiscreteUniform', 'Geometric', 'Categorical']


Expand Down Expand Up @@ -593,7 +594,7 @@ class ZeroInflatedPoisson(Discrete):

.. math::

f(x \mid \theta, \psi) = \left\{ \begin{array}{l}
f(x \mid \psi, \theta) = \left\{ \begin{array}{l}
(1-\psi) + \psi e^{-\theta}, \text{if } x = 0 \\
\psi \frac{e^{-\theta}\theta^x}{x!}, \text{if } x=1,2,3,\ldots
\end{array} \right.
Expand All @@ -606,15 +607,14 @@ class ZeroInflatedPoisson(Discrete):

Parameters
----------
psi : float
Expected proportion of Poisson variates (0 < psi < 1)
theta : float
Expected number of occurrences during the given interval
(theta >= 0).
psi : float
Expected proportion of Poisson variates (0 < psi < 1)

"""

def __init__(self, theta, psi, *args, **kwargs):
def __init__(self, psi, theta, *args, **kwargs):
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

isn't this a backward incompatible change?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This harmonizes them with the convention in Mixture, which is where they will eventually end up. So, a break is coming in one place or the other.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Then we should definitely put that in the release notes. And maybe also print a warning until 3.2?

super(ZeroInflatedPoisson, self).__init__(*args, **kwargs)
self.theta = theta = tt.as_tensor_variable(theta)
self.psi = psi = tt.as_tensor_variable(psi)
Expand All @@ -630,9 +630,17 @@ def random(self, point=None, size=None, repeat=None):
return reshape_sampled(sampled, size, self.shape)

def logp(self, value):
return tt.switch(value > 0,
tt.log(self.psi) + self.pois.logp(value),
tt.log((1. - self.psi) + self.psi * tt.exp(-self.theta)))
psi = self.psi
theta = self.theta

logp_val = tt.switch(value > 0,
tt.log(psi) + self.pois.logp(value),
logaddexp(tt.log1p(-psi), tt.log(psi) - theta))

return bound(logp_val,
0 <= value,
0 <= psi, psi <= 1,
0 <= theta)

def _repr_latex_(self, name=None, dist=None):
if dist is None:
Expand All @@ -644,6 +652,76 @@ def _repr_latex_(self, name=None, dist=None):
get_variable_name(psi))


class ZeroInflatedBinomial(Discrete):
R"""
Zero-inflated Binomial log-likelihood.

.. math::

f(x \mid \psi, n, p) = \left\{ \begin{array}{l}
(1-\psi) + \psi (1-p)^{n}, \text{if } x = 0 \\
\psi {n \choose x} p^x (1-p)^{n-x}, \text{if } x=1,2,3,\ldots,n
\end{array} \right.

======== ==========================
Support :math:`x \in \mathbb{N}_0`
Mean :math:`(1 - \psi) n p`
Variance :math:`(1-\psi) n p [1 - p(1 - \psi n)].`
======== ==========================

Parameters
----------
psi : float
Expected proportion of Binomial variates (0 < psi < 1)
n : int
Number of Bernoulli trials (n >= 0).
p : float
Probability of success in each trial (0 < p < 1).

"""

def __init__(self, psi, n, p, *args, **kwargs):
super(ZeroInflatedBinomial, self).__init__(*args, **kwargs)
self.n = n = tt.as_tensor_variable(n)
self.p = p = tt.as_tensor_variable(p)
self.psi = psi = tt.as_tensor_variable(psi)
self.bin = Binomial.dist(n, p)
self.mode = self.bin.mode

def random(self, point=None, size=None, repeat=None):
n, p, psi = draw_values([self.n, self.p, self.psi], point=point)
g = generate_samples(stats.binom.rvs, n, p,
dist_shape=self.shape,
size=size)
sampled = g * (np.random.random(np.squeeze(g.shape)) < psi)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What is the squeeze about? Doesn't this break broadcasting with g?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Doesn't appear to. I'm just mirroring what is happening in the other ZI dists.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It passes the test_distribution_random test.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

ok then

return reshape_sampled(sampled, size, self.shape)

def logp(self, value):
psi = self.psi
p = self.p
n = self.n

logp_val = tt.switch(value > 0,
tt.log(psi) + self.bin.logp(value),
logaddexp(tt.log1p(-psi), tt.log(psi) + n * tt.log1p(-p)))

return bound(logp_val,
0 <= value, value <= n,
0 <= psi, psi <= 1,
0 <= p, p <= 1)

def _repr_latex_(self, name=None, dist=None):
if dist is None:
dist = self
n = dist.n
p = dist.p
psi = dist.psi
return r'${} \sim \text{{ZeroInflatedBinomial}}(\mathit{{n}}={}, \mathit{{p}}={}, \mathit{{psi}}={})$'.format(name,
get_variable_name(n),
get_variable_name(p),
get_variable_name(psi))


class ZeroInflatedNegativeBinomial(Discrete):
R"""
Zero-Inflated Negative binomial log-likelihood.
Expand All @@ -654,7 +732,7 @@ class ZeroInflatedNegativeBinomial(Discrete):

.. math::

f(x \mid \mu, \alpha, \psi) = \left\{ \begin{array}{l}
f(x \mid \psi, \mu, \alpha) = \left\{ \begin{array}{l}
(1-\psi) + \psi \left (\frac{\alpha}{\alpha+\mu} \right) ^\alpha, \text{if } x = 0 \\
\psi \frac{\Gamma(x+\alpha)}{x! \Gamma(\alpha)} \left (\frac{\alpha}{\mu+\alpha} \right)^\alpha \left( \frac{\mu}{\mu+\alpha} \right)^x, \text{if } x=1,2,3,\ldots
\end{array} \right.
Expand All @@ -667,15 +745,16 @@ class ZeroInflatedNegativeBinomial(Discrete):

Parameters
----------
psi : float
Expected proportion of NegativeBinomial variates (0 < psi < 1)
mu : float
Poission distribution parameter (mu > 0).
alpha : float
Gamma distribution parameter (alpha > 0).
psi : float
Expected proportion of NegativeBinomial variates (0 < psi < 1)

"""

def __init__(self, mu, alpha, psi, *args, **kwargs):
def __init__(self, psi, mu, alpha, *args, **kwargs):
super(ZeroInflatedNegativeBinomial, self).__init__(*args, **kwargs)
self.mu = mu = tt.as_tensor_variable(mu)
self.alpha = alpha = tt.as_tensor_variable(alpha)
Expand All @@ -694,9 +773,18 @@ def random(self, point=None, size=None, repeat=None):
return reshape_sampled(sampled, size, self.shape)

def logp(self, value):
return tt.switch(value > 0,
tt.log(self.psi) + self.nb.logp(value),
tt.log((1. - self.psi) + self.psi * (self.alpha / (self.alpha + self.mu))**self.alpha))
alpha = self.alpha
mu = self.mu
psi = self.psi

logp_val = tt.switch(value > 0,
tt.log(psi) + self.nb.logp(value),
logaddexp(tt.log1p(-psi), tt.log(psi) + alpha * (tt.log(alpha) - tt.log(alpha + mu))))

return bound(logp_val,
0 <= value,
0 <= psi, psi <= 1,
mu > 0, alpha > 0)

def _repr_latex_(self, name=None, dist=None):
if dist is None:
Expand Down
5 changes: 5 additions & 0 deletions pymc3/math.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,11 @@ def logsumexp(x, axis=None):
x_max = tt.max(x, axis=axis, keepdims=True)
return tt.log(tt.sum(tt.exp(x - x_max), axis=axis, keepdims=True)) + x_max

def logaddexp(a, b):
diff = b - a
return tt.switch(diff > 0,
b + tt.log1p(tt.exp(-diff)),
a + tt.log1p(tt.exp(diff)))

def invlogit(x, eps=sys.float_info.epsilon):
return (1. - 2. * eps) / (1. + tt.exp(-x)) + eps
Expand Down
6 changes: 5 additions & 1 deletion pymc3/tests/test_distributions.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
NegativeBinomial, Geometric, Exponential, ExGaussian, Normal,
Flat, LKJCorr, Wald, ChiSquared, HalfNormal, DiscreteUniform,
Bound, Uniform, Triangular, Binomial, SkewNormal, DiscreteWeibull, Gumbel,
Interpolated)
Interpolated, ZeroInflatedBinomial)
from ..distributions import continuous
from pymc3.theanof import floatX
from numpy import array, inf, log, exp
Expand Down Expand Up @@ -591,6 +591,10 @@ def test_zeroinflatednegativebinomial(self):
self.checkd(ZeroInflatedNegativeBinomial, Nat,
{'mu': Rplusbig, 'alpha': Rplusbig, 'psi': Unit})

def test_zeroinflatedbinomial(self):
self.checkd(ZeroInflatedBinomial, Nat,
{'n': NatSmall, 'p': Unit, 'psi': Unit})

@pytest.mark.parametrize('n', [1, 2, 3])
def test_mvnormal(self, n):
self.pymc3_matches_scipy(MvNormal, RealMatrix(5, n),
Expand Down
3 changes: 3 additions & 0 deletions pymc3/tests/test_distributions_random.py
Original file line number Diff line number Diff line change
Expand Up @@ -338,6 +338,9 @@ class TestZeroInflatedNegativeBinomial(BaseTestCases.BaseTestCase):
distribution = pm.ZeroInflatedNegativeBinomial
params = {'mu': 1., 'alpha': 1., 'psi': 0.3}

class TestZeroInflatedBinomial(BaseTestCases.BaseTestCase):
distribution = pm.ZeroInflatedBinomial
params = {'n': 10, 'p': 0.6, 'psi': 0.3}

class TestDiscreteUniform(BaseTestCases.BaseTestCase):
distribution = pm.DiscreteUniform
Expand Down