|
38 | 38 | "ZeroInflatedNegativeBinomial",
|
39 | 39 | "DiscreteUniform",
|
40 | 40 | "Geometric",
|
| 41 | + "HyperGeometric", |
41 | 42 | "Categorical",
|
42 | 43 | "OrderedLogistic",
|
43 | 44 | ]
|
@@ -809,6 +810,115 @@ def logp(self, value):
|
809 | 810 | return bound(tt.log(p) + logpow(1 - p, value - 1), 0 <= p, p <= 1, value >= 1)
|
810 | 811 |
|
811 | 812 |
|
| 813 | +class HyperGeometric(Discrete): |
| 814 | + R""" |
| 815 | + Discrete hypergeometric distribution. |
| 816 | +
|
| 817 | + The probability of :math:`x` successes in a sequence of :math:`n` bernoulli |
| 818 | + trials taken without replacement from a population of :math:`N` objects, |
| 819 | + containing :math:`k` good (or successful or Type I) objects. |
| 820 | + The pmf of this distribution is |
| 821 | +
|
| 822 | + .. math:: f(x \mid N, n, k) = \frac{\binom{k}{x}\binom{N-k}{n-x}}{\binom{N}{n}} |
| 823 | +
|
| 824 | + .. plot:: |
| 825 | +
|
| 826 | + import matplotlib.pyplot as plt |
| 827 | + import numpy as np |
| 828 | + import scipy.stats as st |
| 829 | + plt.style.use('seaborn-darkgrid') |
| 830 | + x = np.arange(1, 15) |
| 831 | + N = 50 |
| 832 | + k = 10 |
| 833 | + for n in [20, 25]: |
| 834 | + pmf = st.hypergeom.pmf(x, N, k, n) |
| 835 | + plt.plot(x, pmf, '-o', label='n = {}'.format(n)) |
| 836 | + plt.plot(x, pmf, '-o', label='N = {}'.format(N)) |
| 837 | + plt.plot(x, pmf, '-o', label='k = {}'.format(k)) |
| 838 | + plt.xlabel('x', fontsize=12) |
| 839 | + plt.ylabel('f(x)', fontsize=12) |
| 840 | + plt.legend(loc=1) |
| 841 | + plt.show() |
| 842 | +
|
| 843 | + ======== ============================= |
| 844 | +
|
| 845 | + Support :math:`x in [max(0, n - \mathbb{N} + k), min(k, n)]` |
| 846 | + Mean :math:`\dfrac{nk}{N}` |
| 847 | + Variance :math:`\dfrac{(N-n)nk(N-k)}{(N-1)N^2}` |
| 848 | + ======== ============================= |
| 849 | +
|
| 850 | + Parameters |
| 851 | + ---------- |
| 852 | + N : integer |
| 853 | + Total size of the population |
| 854 | + n : integer |
| 855 | + Number of samples drawn from the population |
| 856 | + k : integer |
| 857 | + Number of successful individuals in the population |
| 858 | + """ |
| 859 | + |
| 860 | + def __init__(self, N, k, n, *args, **kwargs): |
| 861 | + super().__init__(*args, **kwargs) |
| 862 | + self.N = intX(N) |
| 863 | + self.k = intX(k) |
| 864 | + self.n = intX(n) |
| 865 | + self.mode = intX(tt.floor((n + 1) * (k + 1) / (N + 2))) |
| 866 | + |
| 867 | + def random(self, point=None, size=None): |
| 868 | + r""" |
| 869 | + Draw random values from HyperGeometric distribution. |
| 870 | +
|
| 871 | + Parameters |
| 872 | + ---------- |
| 873 | + point : dict, optional |
| 874 | + Dict of variable values on which random values are to be |
| 875 | + conditioned (uses default point if not specified). |
| 876 | + size : int, optional |
| 877 | + Desired size of random sample (returns one sample if not |
| 878 | + specified). |
| 879 | +
|
| 880 | + Returns |
| 881 | + ------- |
| 882 | + array |
| 883 | + """ |
| 884 | + N, n, k = draw_values([self.N, self.n, self.k], point=point, size=size) |
| 885 | + return generate_samples( |
| 886 | + np.random.hypergeometric, N, n, k, dist_shape=self.shape, size=size |
| 887 | + ) |
| 888 | + |
| 889 | + def logp(self, value): |
| 890 | + r""" |
| 891 | + Calculate log-probability of HyperGeometric distribution at specified value. |
| 892 | +
|
| 893 | + Parameters |
| 894 | + ---------- |
| 895 | + value : numeric |
| 896 | + Value(s) for which log-probability is calculated. If the log probabilities for multiple |
| 897 | + values are desired the values must be provided in a numpy array or theano tensor |
| 898 | +
|
| 899 | + Returns |
| 900 | + ------- |
| 901 | + TensorVariable |
| 902 | + """ |
| 903 | + N = self.N |
| 904 | + k = self.k |
| 905 | + n = self.n |
| 906 | + tot, good = N, k |
| 907 | + bad = tot - good |
| 908 | + result = ( |
| 909 | + betaln(good + 1, 1) |
| 910 | + + betaln(bad + 1, 1) |
| 911 | + + betaln(tot - n + 1, n + 1) |
| 912 | + - betaln(value + 1, good - value + 1) |
| 913 | + - betaln(n - value + 1, bad - n + value + 1) |
| 914 | + - betaln(tot + 1, 1) |
| 915 | + ) |
| 916 | + lower = tt.switch(tt.gt(n - N + k, 0), n - N + k, 0) |
| 917 | + upper = tt.switch(tt.lt(k, n), k, n) |
| 918 | + nonint_value = (value != intX(tt.floor(value))) |
| 919 | + return bound(result, lower <= value, value <= upper, nonint_value) |
| 920 | + |
| 921 | + |
812 | 922 | class DiscreteUniform(Discrete):
|
813 | 923 | R"""
|
814 | 924 | Discrete uniform distribution.
|
|
0 commit comments