std::rand: optimise & document ziggurat.

huonw · huonw · commit ed5f2d7c7c67 · 2013-10-23T10:40:06.000+11:00
Before:

    test rand::distributions::bench::rand_exp ... bench: 1399 ns/iter (+/- 124) = 571 MB/s
    test rand::distributions::bench::rand_normal ... bench: 1611 ns/iter (+/- 123) = 496 MB/s

After:

    test rand::distributions::bench::rand_exp ... bench: 712 ns/iter (+/- 43) = 1123 MB/s
    test rand::distributions::bench::rand_normal ... bench: 1007 ns/iter (+/- 81) = 794 MB/s
diff --git a/src/libstd/rand/distributions.rs b/src/libstd/rand/distributions.rs
@@ -63,21 +63,48 @@ impl<Sup: Rand> IndependentSample<Sup> for RandSample<Sup> {
 
 mod ziggurat_tables;
 
-// inlining should mean there is no performance penalty for this
-#[inline]
+
+/// Sample a random number using the Ziggurat method (specifically the
+/// ZIGNOR variant from Doornik 2005). Most of the arguments are
+/// directly from the paper:
+///
+/// * `rng`: source of randomness
+/// * `symmetric`: whether this is a symmetric distribution, or one-sided with P(x < 0) = 0.
+/// * `X`: the $x_i$ abscissae.
+/// * `F`: precomputed values of the PDF at the $x_i$, (i.e. $f(x_i)$)
+/// * `F_DIFF`: precomputed values of $f(x_i) - f(x_{i+1})$
+/// * `pdf`: the probability density function
+/// * `zero_case`: manual sampling from the tail when we chose the
+///    bottom box (i.e. i == 0)
+
+// the perf improvement (25-50%) is definitely worth the extra code
+// size from force-inlining.
+#[inline(always)]
 fn ziggurat<R:Rng>(rng: &mut R,
-                   center_u: bool,
+                   symmetric: bool,
                    X: ziggurat_tables::ZigTable,
                    F: ziggurat_tables::ZigTable,
                    F_DIFF: ziggurat_tables::ZigTable,
-                   pdf: &'static fn(f64) -> f64, // probability density function
+                   pdf: &'static fn(f64) -> f64,
                    zero_case: &'static fn(&mut R, f64) -> f64) -> f64 {
+    static SCALE: f64 = (1u64 << 53) as f64;
     loop {
-        let u = if center_u {2.0 * rng.gen() - 1.0} else {rng.gen()};
-        let i: uint = rng.gen::<uint>() & 0xff;
+        // reimplement the f64 generation as an optimisation suggested
+        // by the Doornik paper: we have a lot of precision-space
+        // (i.e. there are 11 bits of the 64 of a u64 to use after
+        // creating a f64), so we might as well reuse some to save
+        // generating a whole extra random number. (Seems to be 15%
+        // faster.)
+        let bits: u64 = rng.gen();
+        let i = (bits & 0xff) as uint;
+        let f = (bits >> 11) as f64 / SCALE;
+
+        // u is either U(-1, 1) or U(0, 1) depending on if this is a
+        // symmetric distribution or not.
+        let u = if symmetric {2.0 * f - 1.0} else {f};
         let x = u * X[i];
 
-        let test_x = if center_u {num::abs(x)} else {x};
+        let test_x = if symmetric {num::abs(x)} else {x};
 
         // algebraically equivalent to |u| < X[i+1]/X[i] (or u < X[i+1]/X[i])
         if test_x < X[i + 1] {
@@ -87,7 +114,7 @@ fn ziggurat<R:Rng>(rng: &mut R,
             return zero_case(rng, u);
         }
         // algebraically equivalent to f1 + DRanU()*(f0 - f1) < 1
-        if F[i+1] + F_DIFF[i+1] * rng.gen() < pdf(x) {
+        if F[i + 1] + F_DIFF[i + 1] * rng.gen() < pdf(x) {
             return x;
         }
     }
@@ -318,3 +345,40 @@ mod tests {
         Exp::new(-10.0);
     }
 }
+
+#[cfg(test)]
+mod bench {
+    use extra::test::BenchHarness;
+    use rand::*;
+    use super::*;
+    use iter::range;
+    use option::{Some, None};
+    use mem::size_of;
+
+    static N: u64 = 100;
+
+    #[bench]
+    fn rand_normal(bh: &mut BenchHarness) {
+        let mut rng = XorShiftRng::new();
+        let mut normal = Normal::new(-2.71828, 3.14159);
+
+        do bh.iter {
+            for _ in range(0, N) {
+                normal.sample(&mut rng);
+            }
+        }
+        bh.bytes = size_of::<f64>() as u64 * N;
+    }
+    #[bench]
+    fn rand_exp(bh: &mut BenchHarness) {
+        let mut rng = XorShiftRng::new();
+        let mut exp = Exp::new(2.71828 * 3.14159);
+
+        do bh.iter {
+            for _ in range(0, N) {
+                exp.sample(&mut rng);
+            }
+        }
+        bh.bytes = size_of::<f64>() as u64 * N;
+    }
+}