Refactor ZeroInflatedBinomial

ricardoV94 · ricardoV94 · commit 7a7405f5a623 · 2021-05-12T13:45:04.000+02:00
diff --git a/pymc3/distributions/discrete.py b/pymc3/distributions/discrete.py
@@ -1343,6 +1343,21 @@ def logcdf(value, psi, theta):
         )
 
 
+class ZeroInflatedBinomialRV(RandomVariable):
+    name = "zero_inflated_binomial"
+    ndim_supp = 0
+    ndims_params = [0, 0, 0]
+    dtype = "int64"
+    _print_name = ("ZeroInflatedBinom", "\\operatorname{ZeroInflatedBinom}")
+
+    @classmethod
+    def rng_fn(cls, rng, psi, n, p, size):
+        return rng.binomial(n=n, p=p, size=size) * (rng.random(size=size) < psi)
+
+
+zero_inflated_binomial = ZeroInflatedBinomialRV()
+
+
 class ZeroInflatedBinomial(Discrete):
     R"""
     Zero-inflated Binomial log-likelihood.
@@ -1395,37 +1410,16 @@ class ZeroInflatedBinomial(Discrete):
 
     """
 
-    def __init__(self, psi, n, p, *args, **kwargs):
-        super().__init__(*args, **kwargs)
-        self.n = n = at.as_tensor_variable(intX(n))
-        self.p = p = at.as_tensor_variable(floatX(p))
-        self.psi = psi = at.as_tensor_variable(floatX(psi))
-        self.bin = Binomial.dist(n, p)
-        self.mode = self.bin.mode
-
-    def random(self, point=None, size=None):
-        r"""
-        Draw random values from ZeroInflatedBinomial distribution.
+    rv_op = zero_inflated_binomial
 
-        Parameters
-        ----------
-        point: dict, optional
-            Dict of variable values on which random values are to be
-            conditioned (uses default point if not specified).
-        size: int, optional
-            Desired size of random sample (returns one sample if not
-            specified).
-
-        Returns
-        -------
-        array
-        """
-        # n, p, psi = draw_values([self.n, self.p, self.psi], point=point, size=size)
-        # g = generate_samples(stats.binom.rvs, n, p, dist_shape=self.shape, size=size)
-        # g, psi = broadcast_distribution_samples([g, psi], size=size)
-        # return g * (np.random.random(g.shape) < psi)
+    @classmethod
+    def dist(cls, psi, n, p, *args, **kwargs):
+        psi = at.as_tensor_variable(floatX(psi))
+        n = at.as_tensor_variable(intX(n))
+        p = at.as_tensor_variable(floatX(p))
+        return super().dist([psi, n, p], *args, **kwargs)
 
-    def logp(self, value):
+    def logp(value, psi, n, p):
         r"""
         Calculate log-probability of ZeroInflatedBinomial distribution at specified value.
 
@@ -1439,19 +1433,24 @@ def logp(self, value):
         -------
         TensorVariable
         """
-        psi = self.psi
-        p = self.p
-        n = self.n
 
         logp_val = at.switch(
             at.gt(value, 0),
-            at.log(psi) + self.bin.logp(value),
+            at.log(psi) + Binomial.logp(value, n, p),
             logaddexp(at.log1p(-psi), at.log(psi) + n * at.log1p(-p)),
         )
 
-        return bound(logp_val, 0 <= value, value <= n, 0 <= psi, psi <= 1, 0 <= p, p <= 1)
+        return bound(
+            logp_val,
+            0 <= value,
+            value <= n,
+            0 <= psi,
+            psi <= 1,
+            0 <= p,
+            p <= 1,
+        )
 
-    def logcdf(self, value):
+    def logcdf(value, psi, n, p):
         """
         Compute the log of the cumulative distribution function for ZeroInflatedBinomial distribution
         at the specified value.
@@ -1465,19 +1464,21 @@ def logcdf(self, value):
         -------
         TensorVariable
         """
+
         # logcdf can only handle scalar values due to limitation in Binomial.logcdf
         if np.ndim(value):
             raise TypeError(
                 f"ZeroInflatedBinomial.logcdf expects a scalar value but received a {np.ndim(value)}-dimensional object."
             )
 
-        psi = self.psi
-
         return bound(
-            logaddexp(at.log1p(-psi), at.log(psi) + self.bin.logcdf(value)),
+            logaddexp(at.log1p(-psi), at.log(psi) + Binomial.logcdf(value, n, p)),
             0 <= value,
+            value <= n,
             0 <= psi,
             psi <= 1,
+            0 <= p,
+            p <= 1,
         )
 
 
diff --git a/pymc3/tests/test_distributions.py b/pymc3/tests/test_distributions.py
@@ -1682,17 +1682,39 @@ def test_zeroinflatednegativebinomial_logcdf(self):
             n_samples=10,
         )
 
-    # Too lazy to propagate decimal parameter through the whole chain of deps
-    @pytest.mark.xfail(reason="Distribution not refactored yet")
+    @pytest.mark.xfail(reason="Test not refactored yet")
     def test_zeroinflatedbinomial_distribution(self):
         self.checkd(
             ZeroInflatedBinomial,
             Nat,
             {"n": NatSmall, "p": Unit, "psi": Unit},
         )
 
-    @pytest.mark.xfail(reason="Distribution not refactored yet")
-    def test_zeroinflatedbinomial_logcdf(self):
+    def test_zeroinflatedbinomial(self):
+        def logp_fn(value, psi, n, p):
+            if value == 0:
+                return np.log((1 - psi) * sp.binom.pmf(0, n, p))
+            else:
+                return np.log(psi * sp.binom.pmf(value, n, p))
+
+        def logcdf_fn(value, psi, n, p):
+            return np.log((1 - psi) + psi * sp.binom.cdf(value, n, p))
+
+        self.check_logp(
+            ZeroInflatedBinomial,
+            Nat,
+            {"psi": Unit, "n": NatSmall, "p": Unit},
+            logp_fn,
+        )
+
+        self.check_logcdf(
+            ZeroInflatedBinomial,
+            Nat,
+            {"psi": Unit, "n": NatSmall, "p": Unit},
+            logcdf_fn,
+            n_samples=10,
+        )
+
         self.check_selfconsistency_discrete_logcdf(
             ZeroInflatedBinomial,
             Nat,
diff --git a/pymc3/tests/test_distributions_random.py b/pymc3/tests/test_distributions_random.py
@@ -954,6 +954,37 @@ def seeded_zero_inflated_poisson_rng_fn(self):
     ]
 
 
+class TestZeroInflatedBinomial(BaseTestDistribution):
+    def zero_inflated_poisson_rng_fn(self, size, psi, n, p, binomial_rng_fct, random_rng_fct):
+        return binomial_rng_fct(n, p, size=size) * (random_rng_fct(size=size) < psi)
+
+    def seeded_zero_inflated_binomial_rng_fn(self):
+        binomial_rng_fct = functools.partial(
+            getattr(np.random.RandomState, "binomial"), self.get_random_state()
+        )
+
+        random_rng_fct = functools.partial(
+            getattr(np.random.RandomState, "random"), self.get_random_state()
+        )
+
+        return functools.partial(
+            self.zero_inflated_poisson_rng_fn,
+            binomial_rng_fct=binomial_rng_fct,
+            random_rng_fct=random_rng_fct,
+        )
+
+    pymc_dist = pm.ZeroInflatedBinomial
+    pymc_dist_params = {"psi": 0.9, "n": 12, "p": 0.7}
+    expected_rv_op_params = {"psi": 0.9, "n": 12, "p": 0.7}
+    reference_dist_params = {"psi": 0.9, "n": 12, "p": 0.7}
+    reference_dist = seeded_zero_inflated_binomial_rng_fn
+    tests_to_run = [
+        "check_pymc_params_match_rv_op",
+        "check_pymc_draws_match_reference",
+        "check_rv_size",
+    ]
+
+
 class TestOrderedLogistic(BaseTestDistribution):
     pymc_dist = pm.OrderedLogistic
     pymc_dist_params = {"eta": 0, "cutpoints": np.array([-2, 0, 2])}