Merge pull request #2251 from pymc-devs/zero_inflated_binomial

fonnesbeck · web-flow · commit 0974ecc67120 · 2017-06-02T07:22:48.000-05:00
Zero inflated binomial
diff --git a/pymc3/__init__.py b/pymc3/__init__.py
@@ -6,7 +6,7 @@
 from .external import *
 from .glm import *
 from . import gp
-from .math import logsumexp, logit, invlogit, expand_packed_triangular, probit, invprobit
+from .math import logaddexp, logsumexp, logit, invlogit, expand_packed_triangular, probit, invprobit
 from .model import *
 from .stats import *
 from .sampling import *
diff --git a/pymc3/distributions/__init__.py b/pymc3/distributions/__init__.py
@@ -37,6 +37,7 @@
 from .discrete import Constant
 from .discrete import ZeroInflatedPoisson
 from .discrete import ZeroInflatedNegativeBinomial
+from .discrete import ZeroInflatedBinomial
 from .discrete import DiscreteUniform
 from .discrete import Geometric
 from .discrete import Categorical
@@ -106,6 +107,7 @@
            'Constant',
            'ZeroInflatedPoisson',
            'ZeroInflatedNegativeBinomial',
+           'ZeroInflatedBinomial',
            'DiscreteUniform',
            'Geometric',
            'Categorical',
diff --git a/pymc3/distributions/discrete.py b/pymc3/distributions/discrete.py
@@ -8,10 +8,11 @@
 from .dist_math import bound, factln, binomln, betaln, logpow
 from .distribution import Discrete, draw_values, generate_samples, reshape_sampled
 from pymc3.math import tround
+from ..math import logaddexp
 
 __all__ = ['Binomial',  'BetaBinomial',  'Bernoulli',  'DiscreteWeibull',
            'Poisson', 'NegativeBinomial', 'ConstantDist', 'Constant',
-           'ZeroInflatedPoisson', 'ZeroInflatedNegativeBinomial',
+           'ZeroInflatedPoisson', 'ZeroInflatedBinomial', 'ZeroInflatedNegativeBinomial',
            'DiscreteUniform', 'Geometric', 'Categorical']
 
 
@@ -593,7 +594,7 @@ class ZeroInflatedPoisson(Discrete):
 
     .. math::
 
-        f(x \mid \theta, \psi) = \left\{ \begin{array}{l}
+        f(x \mid \psi, \theta) = \left\{ \begin{array}{l}
             (1-\psi) + \psi e^{-\theta}, \text{if } x = 0 \\
             \psi \frac{e^{-\theta}\theta^x}{x!}, \text{if } x=1,2,3,\ldots
             \end{array} \right.
@@ -606,15 +607,14 @@ class ZeroInflatedPoisson(Discrete):
 
     Parameters
     ----------
+    psi : float
+        Expected proportion of Poisson variates (0 < psi < 1)
     theta : float
         Expected number of occurrences during the given interval
         (theta >= 0).
-    psi : float
-        Expected proportion of Poisson variates (0 < psi < 1)
-
     """
 
-    def __init__(self, theta, psi, *args, **kwargs):
+    def __init__(self, psi, theta, *args, **kwargs):
         super(ZeroInflatedPoisson, self).__init__(*args, **kwargs)
         self.theta = theta = tt.as_tensor_variable(theta)
         self.psi = psi = tt.as_tensor_variable(psi)
@@ -630,9 +630,17 @@ def random(self, point=None, size=None, repeat=None):
         return reshape_sampled(sampled, size, self.shape)
 
     def logp(self, value):
-        return tt.switch(value > 0,
-                         tt.log(self.psi) + self.pois.logp(value),
-                         tt.log((1. - self.psi) + self.psi * tt.exp(-self.theta)))
+        psi = self.psi
+        theta = self.theta
+
+        logp_val = tt.switch(value > 0,
+                     tt.log(psi) + self.pois.logp(value),
+                     logaddexp(tt.log1p(-psi), tt.log(psi) - theta))
+
+        return bound(logp_val,
+            0 <= value,
+            0 <= psi, psi <= 1,
+            0 <= theta)
 
     def _repr_latex_(self, name=None, dist=None):
         if dist is None:
@@ -644,6 +652,76 @@ def _repr_latex_(self, name=None, dist=None):
                                                 get_variable_name(psi))
 
 
+class ZeroInflatedBinomial(Discrete):
+    R"""
+    Zero-inflated Binomial log-likelihood.
+
+    .. math::
+
+        f(x \mid \psi, n, p) = \left\{ \begin{array}{l}
+            (1-\psi) + \psi (1-p)^{n}, \text{if } x = 0 \\
+            \psi {n \choose x} p^x (1-p)^{n-x}, \text{if } x=1,2,3,\ldots,n
+            \end{array} \right.
+
+    ========  ==========================
+    Support   :math:`x \in \mathbb{N}_0`
+    Mean      :math:`(1 - \psi) n p`
+    Variance  :math:`(1-\psi) n p [1 - p(1 - \psi n)].`
+    ========  ==========================
+
+    Parameters
+    ----------
+    psi : float
+        Expected proportion of Binomial variates (0 < psi < 1)
+    n : int
+        Number of Bernoulli trials (n >= 0).
+    p : float
+        Probability of success in each trial (0 < p < 1).
+
+    """
+
+    def __init__(self, psi, n, p, *args, **kwargs):
+        super(ZeroInflatedBinomial, self).__init__(*args, **kwargs)
+        self.n = n = tt.as_tensor_variable(n)
+        self.p = p = tt.as_tensor_variable(p)
+        self.psi = psi = tt.as_tensor_variable(psi)
+        self.bin = Binomial.dist(n, p)
+        self.mode = self.bin.mode
+
+    def random(self, point=None, size=None, repeat=None):
+        n, p, psi = draw_values([self.n, self.p, self.psi], point=point)
+        g = generate_samples(stats.binom.rvs, n, p,
+                             dist_shape=self.shape,
+                             size=size)
+        sampled = g * (np.random.random(np.squeeze(g.shape)) < psi)
+        return reshape_sampled(sampled, size, self.shape)
+
+    def logp(self, value):
+        psi = self.psi
+        p = self.p
+        n = self.n
+
+        logp_val = tt.switch(value > 0,
+                 tt.log(psi) + self.bin.logp(value),
+                 logaddexp(tt.log1p(-psi), tt.log(psi) + n * tt.log1p(-p)))
+
+        return bound(logp_val,
+            0 <= value, value <= n,
+            0 <= psi, psi <= 1,
+            0 <= p, p <= 1)
+
+    def _repr_latex_(self, name=None, dist=None):
+        if dist is None:
+            dist = self
+        n = dist.n
+        p = dist.p
+        psi = dist.psi
+        return r'${} \sim \text{{ZeroInflatedBinomial}}(\mathit{{n}}={}, \mathit{{p}}={}, \mathit{{psi}}={})$'.format(name,
+                                                get_variable_name(n),
+                                                get_variable_name(p),
+                                                get_variable_name(psi))
+
+
 class ZeroInflatedNegativeBinomial(Discrete):
     R"""
     Zero-Inflated Negative binomial log-likelihood.
@@ -654,7 +732,7 @@ class ZeroInflatedNegativeBinomial(Discrete):
 
     .. math::
 
-       f(x \mid \mu, \alpha, \psi) = \left\{ \begin{array}{l}
+       f(x \mid \psi, \mu, \alpha) = \left\{ \begin{array}{l}
             (1-\psi) + \psi \left (\frac{\alpha}{\alpha+\mu} \right) ^\alpha, \text{if } x = 0 \\
             \psi \frac{\Gamma(x+\alpha)}{x! \Gamma(\alpha)} \left (\frac{\alpha}{\mu+\alpha} \right)^\alpha \left( \frac{\mu}{\mu+\alpha} \right)^x, \text{if } x=1,2,3,\ldots
             \end{array} \right.
@@ -667,15 +745,16 @@ class ZeroInflatedNegativeBinomial(Discrete):
 
     Parameters
     ----------
+    psi : float
+        Expected proportion of NegativeBinomial variates (0 < psi < 1)
     mu : float
         Poission distribution parameter (mu > 0).
     alpha : float
         Gamma distribution parameter (alpha > 0).
-    psi : float
-        Expected proportion of NegativeBinomial variates (0 < psi < 1)
+
     """
 
-    def __init__(self, mu, alpha, psi, *args, **kwargs):
+    def __init__(self, psi, mu, alpha, *args, **kwargs):
         super(ZeroInflatedNegativeBinomial, self).__init__(*args, **kwargs)
         self.mu = mu = tt.as_tensor_variable(mu)
         self.alpha = alpha = tt.as_tensor_variable(alpha)
@@ -694,9 +773,18 @@ def random(self, point=None, size=None, repeat=None):
         return reshape_sampled(sampled, size, self.shape)
 
     def logp(self, value):
-        return tt.switch(value > 0,
-                         tt.log(self.psi) + self.nb.logp(value),
-                         tt.log((1. - self.psi) + self.psi * (self.alpha / (self.alpha + self.mu))**self.alpha))
+        alpha = self.alpha
+        mu = self.mu
+        psi = self.psi
+
+        logp_val = tt.switch(value > 0,
+                     tt.log(psi) + self.nb.logp(value),
+                     logaddexp(tt.log1p(-psi), tt.log(psi) + alpha * (tt.log(alpha) - tt.log(alpha + mu))))
+
+        return bound(logp_val,
+                    0 <= value,
+                    0 <= psi, psi <= 1,
+                    mu > 0, alpha > 0)
 
     def _repr_latex_(self, name=None, dist=None):
         if dist is None:
diff --git a/pymc3/math.py b/pymc3/math.py
@@ -31,6 +31,11 @@ def logsumexp(x, axis=None):
     x_max = tt.max(x, axis=axis, keepdims=True)
     return tt.log(tt.sum(tt.exp(x - x_max), axis=axis, keepdims=True)) + x_max
 
+def logaddexp(a, b):
+    diff = b - a
+    return tt.switch(diff > 0, 
+                    b + tt.log1p(tt.exp(-diff)), 
+                    a + tt.log1p(tt.exp(diff)))
 
 def invlogit(x, eps=sys.float_info.epsilon):
     return (1. - 2. * eps) / (1. + tt.exp(-x)) + eps
diff --git a/pymc3/tests/test_distributions.py b/pymc3/tests/test_distributions.py
@@ -14,7 +14,7 @@
                              NegativeBinomial, Geometric, Exponential, ExGaussian, Normal,
                              Flat, LKJCorr, Wald, ChiSquared, HalfNormal, DiscreteUniform,
                              Bound, Uniform, Triangular, Binomial, SkewNormal, DiscreteWeibull, Gumbel,
-                             Interpolated)
+                             Interpolated, ZeroInflatedBinomial)
 from ..distributions import continuous
 from pymc3.theanof import floatX
 from numpy import array, inf, log, exp
@@ -591,6 +591,10 @@ def test_zeroinflatednegativebinomial(self):
         self.checkd(ZeroInflatedNegativeBinomial, Nat,
                     {'mu': Rplusbig, 'alpha': Rplusbig, 'psi': Unit})
 
+    def test_zeroinflatedbinomial(self):
+        self.checkd(ZeroInflatedBinomial, Nat,
+                    {'n': NatSmall, 'p': Unit, 'psi': Unit})
+
     @pytest.mark.parametrize('n', [1, 2, 3])
     def test_mvnormal(self, n):
         self.pymc3_matches_scipy(MvNormal, RealMatrix(5, n),
diff --git a/pymc3/tests/test_distributions_random.py b/pymc3/tests/test_distributions_random.py
@@ -338,6 +338,9 @@ class TestZeroInflatedNegativeBinomial(BaseTestCases.BaseTestCase):
     distribution = pm.ZeroInflatedNegativeBinomial
     params = {'mu': 1., 'alpha': 1., 'psi': 0.3}
 
+class TestZeroInflatedBinomial(BaseTestCases.BaseTestCase):
+    distribution = pm.ZeroInflatedBinomial
+    params = {'n': 10, 'p': 0.6, 'psi': 0.3}
 
 class TestDiscreteUniform(BaseTestCases.BaseTestCase):
     distribution = pm.DiscreteUniform