Refactor ZeroInflatedPoisson

ricardoV94 · ricardoV94 · commit ce252fac76b6 · 2021-05-12T13:45:04.000+02:00
diff --git a/pymc3/distributions/discrete.py b/pymc3/distributions/discrete.py
@@ -1215,6 +1215,21 @@ def logp(value, c):
         )
 
 
+class ZeroInflatedPoissonRV(RandomVariable):
+    name = "zero_inflated_poisson"
+    ndim_supp = 0
+    ndims_params = [0, 0]
+    dtype = "int64"
+    _print_name = ("ZeroInflatedPois", "\\operatorname{ZeroInflatedPois}")
+
+    @classmethod
+    def rng_fn(cls, rng, psi, lam, size):
+        return rng.poisson(lam, size=size) * (rng.random(size=size) < psi)
+
+
+zero_inflated_poisson = ZeroInflatedPoissonRV()
+
+
 class ZeroInflatedPoisson(Discrete):
     R"""
     Zero-inflated Poisson log-likelihood.
@@ -1266,36 +1281,15 @@ class ZeroInflatedPoisson(Discrete):
         (theta >= 0).
     """
 
-    def __init__(self, psi, theta, *args, **kwargs):
-        super().__init__(*args, **kwargs)
-        self.theta = theta = at.as_tensor_variable(floatX(theta))
-        self.psi = at.as_tensor_variable(floatX(psi))
-        self.pois = Poisson.dist(theta)
-        self.mode = self.pois.mode
-
-    def random(self, point=None, size=None):
-        r"""
-        Draw random values from ZeroInflatedPoisson distribution.
-
-        Parameters
-        ----------
-        point: dict, optional
-            Dict of variable values on which random values are to be
-            conditioned (uses default point if not specified).
-        size: int, optional
-            Desired size of random sample (returns one sample if not
-            specified).
+    rv_op = zero_inflated_poisson
 
-        Returns
-        -------
-        array
-        """
-        # theta, psi = draw_values([self.theta, self.psi], point=point, size=size)
-        # g = generate_samples(stats.poisson.rvs, theta, dist_shape=self.shape, size=size)
-        # g, psi = broadcast_distribution_samples([g, psi], size=size)
-        # return g * (np.random.random(g.shape) < psi)
+    @classmethod
+    def dist(cls, psi, theta, *args, **kwargs):
+        psi = at.as_tensor_variable(floatX(psi))
+        theta = at.as_tensor_variable(floatX(theta))
+        return super().dist([psi, theta], *args, **kwargs)
 
-    def logp(self, value):
+    def logp(value, psi, theta):
         r"""
         Calculate log-probability of ZeroInflatedPoisson distribution at specified value.
 
@@ -1309,18 +1303,22 @@ def logp(self, value):
         -------
         TensorVariable
         """
-        psi = self.psi
-        theta = self.theta
 
         logp_val = at.switch(
             at.gt(value, 0),
-            at.log(psi) + self.pois.logp(value),
+            at.log(psi) + Poisson.logp(value, theta),
             logaddexp(at.log1p(-psi), at.log(psi) - theta),
         )
 
-        return bound(logp_val, 0 <= value, 0 <= psi, psi <= 1, 0 <= theta)
+        return bound(
+            logp_val,
+            0 <= value,
+            0 <= psi,
+            psi <= 1,
+            0 <= theta,
+        )
 
-    def logcdf(self, value):
+    def logcdf(value, psi, theta):
         """
         Compute the log of the cumulative distribution function for ZeroInflatedPoisson distribution
         at the specified value.
@@ -1335,13 +1333,13 @@ def logcdf(self, value):
         -------
         TensorVariable
         """
-        psi = self.psi
 
         return bound(
-            logaddexp(at.log1p(-psi), at.log(psi) + self.pois.logcdf(value)),
+            logaddexp(at.log1p(-psi), at.log(psi) + Poisson.logcdf(value, theta)),
             0 <= value,
             0 <= psi,
             psi <= 1,
+            0 <= theta,
         )
 
 
diff --git a/pymc3/tests/test_distributions.py b/pymc3/tests/test_distributions.py
@@ -1618,8 +1618,7 @@ def test_bound_poisson(self):
     def test_constantdist(self):
         self.check_logp(Constant, I, {"c": I}, lambda value, c: np.log(c == value))
 
-    # Too lazy to propagate decimal parameter through the whole chain of deps
-    @pytest.mark.xfail(reason="Distribution not refactored yet")
+    @pytest.mark.xfail(reason="Test has not been refactored")
     @pytest.mark.xfail(
         condition=(aesara.config.floatX == "float32"),
         reason="Fails on float32 due to inf issues",
@@ -1631,8 +1630,30 @@ def test_zeroinflatedpoisson_distribution(self):
             {"theta": Rplus, "psi": Unit},
         )
 
-    @pytest.mark.xfail(reason="Distribution not refactored yet")
-    def test_zeroinflatedpoisson_logcdf(self):
+    def test_zeroinflatedpoisson(self):
+        def logp_fn(value, psi, theta):
+            if value == 0:
+                return np.log((1 - psi) * sp.poisson.pmf(0, theta))
+            else:
+                return np.log(psi * sp.poisson.pmf(value, theta))
+
+        def logcdf_fn(value, psi, theta):
+            return np.log((1 - psi) + psi * sp.poisson.cdf(value, theta))
+
+        self.check_logp(
+            ZeroInflatedPoisson,
+            Nat,
+            {"psi": Unit, "theta": Rplus},
+            logp_fn,
+        )
+
+        self.check_logcdf(
+            ZeroInflatedPoisson,
+            Nat,
+            {"psi": Unit, "theta": Rplus},
+            logcdf_fn,
+        )
+
         self.check_selfconsistency_discrete_logcdf(
             ZeroInflatedPoisson,
             Nat,
diff --git a/pymc3/tests/test_distributions_random.py b/pymc3/tests/test_distributions_random.py
@@ -313,12 +313,6 @@ class TestLogitNormal(BaseTestCases.BaseTestCase):
     params = {"mu": 0.0, "sigma": 1.0}
 
 
-@pytest.mark.xfail(reason="This distribution has not been refactored for v4")
-class TestZeroInflatedPoisson(BaseTestCases.BaseTestCase):
-    distribution = pm.ZeroInflatedPoisson
-    params = {"theta": 1.0, "psi": 0.3}
-
-
 @pytest.mark.xfail(reason="This distribution has not been refactored for v4")
 class TestZeroInflatedNegativeBinomial(BaseTestCases.BaseTestCase):
     distribution = pm.ZeroInflatedNegativeBinomial
@@ -929,6 +923,37 @@ def constant_rng_fn(self, size, c):
     ]
 
 
+class TestZeroInflatedPoisson(BaseTestDistribution):
+    def zero_inflated_poisson_rng_fn(self, size, psi, theta, poisson_rng_fct, random_rng_fct):
+        return poisson_rng_fct(theta, size=size) * (random_rng_fct(size=size) < psi)
+
+    def seeded_zero_inflated_poisson_rng_fn(self):
+        poisson_rng_fct = functools.partial(
+            getattr(np.random.RandomState, "poisson"), self.get_random_state()
+        )
+
+        random_rng_fct = functools.partial(
+            getattr(np.random.RandomState, "random"), self.get_random_state()
+        )
+
+        return functools.partial(
+            self.zero_inflated_poisson_rng_fn,
+            poisson_rng_fct=poisson_rng_fct,
+            random_rng_fct=random_rng_fct,
+        )
+
+    pymc_dist = pm.ZeroInflatedPoisson
+    pymc_dist_params = {"psi": 0.9, "theta": 4.0}
+    expected_rv_op_params = {"psi": 0.9, "theta": 4.0}
+    reference_dist_params = {"psi": 0.9, "theta": 4.0}
+    reference_dist = seeded_zero_inflated_poisson_rng_fn
+    tests_to_run = [
+        "check_pymc_params_match_rv_op",
+        "check_pymc_draws_match_reference",
+        "check_rv_size",
+    ]
+
+
 class TestOrderedLogistic(BaseTestDistribution):
     pymc_dist = pm.OrderedLogistic
     pymc_dist_params = {"eta": 0, "cutpoints": np.array([-2, 0, 2])}