Skip to content

Commit ddf5617

Browse files
committed
Add HyperGeometric distribution to discrete.py; Add tests
1 parent 6ef0b2d commit ddf5617

File tree

4 files changed

+127
-0
lines changed

4 files changed

+127
-0
lines changed

pymc3/distributions/__init__.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -63,6 +63,7 @@
6363
from .discrete import ZeroInflatedBinomial
6464
from .discrete import DiscreteUniform
6565
from .discrete import Geometric
66+
from .discrete import HyperGeometric
6667
from .discrete import Categorical
6768
from .discrete import OrderedLogistic
6869

pymc3/distributions/discrete.py

Lines changed: 110 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,7 @@
3838
"ZeroInflatedNegativeBinomial",
3939
"DiscreteUniform",
4040
"Geometric",
41+
"HyperGeometric",
4142
"Categorical",
4243
"OrderedLogistic",
4344
]
@@ -809,6 +810,115 @@ def logp(self, value):
809810
return bound(tt.log(p) + logpow(1 - p, value - 1), 0 <= p, p <= 1, value >= 1)
810811

811812

813+
class HyperGeometric(Discrete):
814+
R"""
815+
Discrete hypergeometric distribution.
816+
817+
The probability of :math:`x` successes in a sequence of :math:`n` bernoulli
818+
trials taken without replacement from a population of :math:`N` objects,
819+
containing :math:`k` good (or successful or Type I) objects.
820+
The pmf of this distribution is
821+
822+
.. math:: f(x \mid N, n, k) = \frac{\binom{k}{x}\binom{N-k}{n-x}}{\binom{N}{n}}
823+
824+
.. plot::
825+
826+
import matplotlib.pyplot as plt
827+
import numpy as np
828+
import scipy.stats as st
829+
plt.style.use('seaborn-darkgrid')
830+
x = np.arange(1, 15)
831+
N = 50
832+
k = 10
833+
for n in [20, 25]:
834+
pmf = st.hypergeom.pmf(x, N, k, n)
835+
plt.plot(x, pmf, '-o', label='n = {}'.format(n))
836+
plt.plot(x, pmf, '-o', label='N = {}'.format(N))
837+
plt.plot(x, pmf, '-o', label='k = {}'.format(k))
838+
plt.xlabel('x', fontsize=12)
839+
plt.ylabel('f(x)', fontsize=12)
840+
plt.legend(loc=1)
841+
plt.show()
842+
843+
======== =============================
844+
845+
Support :math:`x in [max(0, n - \mathbb{N} + k), min(k, n)]`
846+
Mean :math:`\dfrac{nk}{N}`
847+
Variance :math:`\dfrac{(N-n)nk(N-k)}{(N-1)N^2}`
848+
======== =============================
849+
850+
Parameters
851+
----------
852+
N : integer
853+
Total size of the population
854+
n : integer
855+
Number of samples drawn from the population
856+
k : integer
857+
Number of successful individuals in the population
858+
"""
859+
860+
def __init__(self, N, k, n, *args, **kwargs):
861+
super().__init__(*args, **kwargs)
862+
self.N = intX(N)
863+
self.k = intX(k)
864+
self.n = intX(n)
865+
self.mode = intX(tt.floor((n + 1) * (k + 1) / (N + 2)))
866+
867+
def random(self, point=None, size=None):
868+
r"""
869+
Draw random values from HyperGeometric distribution.
870+
871+
Parameters
872+
----------
873+
point : dict, optional
874+
Dict of variable values on which random values are to be
875+
conditioned (uses default point if not specified).
876+
size : int, optional
877+
Desired size of random sample (returns one sample if not
878+
specified).
879+
880+
Returns
881+
-------
882+
array
883+
"""
884+
N, n, k = draw_values([self.N, self.n, self.k], point=point, size=size)
885+
return generate_samples(
886+
np.random.hypergeometric, N, n, k, dist_shape=self.shape, size=size
887+
)
888+
889+
def logp(self, value):
890+
r"""
891+
Calculate log-probability of HyperGeometric distribution at specified value.
892+
893+
Parameters
894+
----------
895+
value : numeric
896+
Value(s) for which log-probability is calculated. If the log probabilities for multiple
897+
values are desired the values must be provided in a numpy array or theano tensor
898+
899+
Returns
900+
-------
901+
TensorVariable
902+
"""
903+
N = self.N
904+
k = self.k
905+
n = self.n
906+
tot, good = N, k
907+
bad = tot - good
908+
result = (
909+
betaln(good + 1, 1)
910+
+ betaln(bad + 1, 1)
911+
+ betaln(tot - n + 1, n + 1)
912+
- betaln(value + 1, good - value + 1)
913+
- betaln(n - value + 1, bad - n + value + 1)
914+
- betaln(tot + 1, 1)
915+
)
916+
lower = tt.switch(tt.gt(n - N + k, 0), n - N + k, 0)
917+
upper = tt.switch(tt.lt(k, n), k, n)
918+
nonint_value = (value != intX(tt.floor(value)))
919+
return bound(result, lower <= value, value <= upper, nonint_value)
920+
921+
812922
class DiscreteUniform(Discrete):
813923
R"""
814924
Discrete uniform distribution.

pymc3/tests/test_distributions.py

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -75,6 +75,7 @@
7575
Rice,
7676
Kumaraswamy,
7777
Moyal,
78+
HyperGeometric,
7879
)
7980

8081
from ..distributions import continuous
@@ -790,6 +791,14 @@ def test_geometric(self):
790791
Geometric, Nat, {"p": Unit}, lambda value, p: np.log(sp.geom.pmf(value, p))
791792
)
792793

794+
def test_hypergeometric(self):
795+
self.pymc3_matches_scipy(
796+
HyperGeometric,
797+
Nat,
798+
{"N": NatSmall, "n": NatSmall, "k": NatSmall},
799+
lambda value, N, n, k: sp.hypergeom.logpmf(value, N, k, n),
800+
)
801+
793802
def test_negative_binomial(self):
794803
def test_fun(value, mu, alpha):
795804
return sp.nbinom.logpmf(value, alpha, 1 - mu / (mu + alpha))

pymc3/tests/test_distributions_random.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -506,6 +506,10 @@ class TestGeometric(BaseTestCases.BaseTestCase):
506506
distribution = pm.Geometric
507507
params = {"p": 0.5}
508508

509+
class TestHyperGeometric(BaseTestCases.BaseTestCase):
510+
distribution = pm.HyperGeometric
511+
params = {"N": 50, "n": 25, "k": 10}
512+
509513

510514
class TestMoyal(BaseTestCases.BaseTestCase):
511515
distribution = pm.Moyal
@@ -739,6 +743,9 @@ def ref_rand(size, alpha, mu):
739743
def test_geometric(self):
740744
pymc3_random_discrete(pm.Geometric, {"p": Unit}, size=500, fails=50, ref_rand=nr.geometric)
741745

746+
def test_hypergeometric(self):
747+
pymc3_random_discrete(pm.HyperGeometric, {"N": Nat, "n": Nat, "k": Nat}, size=500, fails=50, ref_rand=nr.hypergeometric)
748+
742749
def test_discrete_uniform(self):
743750
def ref_rand(size, lower, upper):
744751
return st.randint.rvs(lower, upper + 1, size=size)

0 commit comments

Comments
 (0)