Skip to content

Commit ed5f2d7

Browse files
committed
std::rand: optimise & document ziggurat.
Before: test rand::distributions::bench::rand_exp ... bench: 1399 ns/iter (+/- 124) = 571 MB/s test rand::distributions::bench::rand_normal ... bench: 1611 ns/iter (+/- 123) = 496 MB/s After: test rand::distributions::bench::rand_exp ... bench: 712 ns/iter (+/- 43) = 1123 MB/s test rand::distributions::bench::rand_normal ... bench: 1007 ns/iter (+/- 81) = 794 MB/s
1 parent e0eb128 commit ed5f2d7

File tree

1 file changed

+72
-8
lines changed

1 file changed

+72
-8
lines changed

src/libstd/rand/distributions.rs

Lines changed: 72 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -63,21 +63,48 @@ impl<Sup: Rand> IndependentSample<Sup> for RandSample<Sup> {
6363

6464
mod ziggurat_tables;
6565

66-
// inlining should mean there is no performance penalty for this
67-
#[inline]
66+
67+
/// Sample a random number using the Ziggurat method (specifically the
68+
/// ZIGNOR variant from Doornik 2005). Most of the arguments are
69+
/// directly from the paper:
70+
///
71+
/// * `rng`: source of randomness
72+
/// * `symmetric`: whether this is a symmetric distribution, or one-sided with P(x < 0) = 0.
73+
/// * `X`: the $x_i$ abscissae.
74+
/// * `F`: precomputed values of the PDF at the $x_i$, (i.e. $f(x_i)$)
75+
/// * `F_DIFF`: precomputed values of $f(x_i) - f(x_{i+1})$
76+
/// * `pdf`: the probability density function
77+
/// * `zero_case`: manual sampling from the tail when we chose the
78+
/// bottom box (i.e. i == 0)
79+
80+
// the perf improvement (25-50%) is definitely worth the extra code
81+
// size from force-inlining.
82+
#[inline(always)]
6883
fn ziggurat<R:Rng>(rng: &mut R,
69-
center_u: bool,
84+
symmetric: bool,
7085
X: ziggurat_tables::ZigTable,
7186
F: ziggurat_tables::ZigTable,
7287
F_DIFF: ziggurat_tables::ZigTable,
73-
pdf: &'static fn(f64) -> f64, // probability density function
88+
pdf: &'static fn(f64) -> f64,
7489
zero_case: &'static fn(&mut R, f64) -> f64) -> f64 {
90+
static SCALE: f64 = (1u64 << 53) as f64;
7591
loop {
76-
let u = if center_u {2.0 * rng.gen() - 1.0} else {rng.gen()};
77-
let i: uint = rng.gen::<uint>() & 0xff;
92+
// reimplement the f64 generation as an optimisation suggested
93+
// by the Doornik paper: we have a lot of precision-space
94+
// (i.e. there are 11 bits of the 64 of a u64 to use after
95+
// creating a f64), so we might as well reuse some to save
96+
// generating a whole extra random number. (Seems to be 15%
97+
// faster.)
98+
let bits: u64 = rng.gen();
99+
let i = (bits & 0xff) as uint;
100+
let f = (bits >> 11) as f64 / SCALE;
101+
102+
// u is either U(-1, 1) or U(0, 1) depending on if this is a
103+
// symmetric distribution or not.
104+
let u = if symmetric {2.0 * f - 1.0} else {f};
78105
let x = u * X[i];
79106

80-
let test_x = if center_u {num::abs(x)} else {x};
107+
let test_x = if symmetric {num::abs(x)} else {x};
81108

82109
// algebraically equivalent to |u| < X[i+1]/X[i] (or u < X[i+1]/X[i])
83110
if test_x < X[i + 1] {
@@ -87,7 +114,7 @@ fn ziggurat<R:Rng>(rng: &mut R,
87114
return zero_case(rng, u);
88115
}
89116
// algebraically equivalent to f1 + DRanU()*(f0 - f1) < 1
90-
if F[i+1] + F_DIFF[i+1] * rng.gen() < pdf(x) {
117+
if F[i + 1] + F_DIFF[i + 1] * rng.gen() < pdf(x) {
91118
return x;
92119
}
93120
}
@@ -318,3 +345,40 @@ mod tests {
318345
Exp::new(-10.0);
319346
}
320347
}
348+
349+
#[cfg(test)]
350+
mod bench {
351+
use extra::test::BenchHarness;
352+
use rand::*;
353+
use super::*;
354+
use iter::range;
355+
use option::{Some, None};
356+
use mem::size_of;
357+
358+
static N: u64 = 100;
359+
360+
#[bench]
361+
fn rand_normal(bh: &mut BenchHarness) {
362+
let mut rng = XorShiftRng::new();
363+
let mut normal = Normal::new(-2.71828, 3.14159);
364+
365+
do bh.iter {
366+
for _ in range(0, N) {
367+
normal.sample(&mut rng);
368+
}
369+
}
370+
bh.bytes = size_of::<f64>() as u64 * N;
371+
}
372+
#[bench]
373+
fn rand_exp(bh: &mut BenchHarness) {
374+
let mut rng = XorShiftRng::new();
375+
let mut exp = Exp::new(2.71828 * 3.14159);
376+
377+
do bh.iter {
378+
for _ in range(0, N) {
379+
exp.sample(&mut rng);
380+
}
381+
}
382+
bh.bytes = size_of::<f64>() as u64 * N;
383+
}
384+
}

0 commit comments

Comments
 (0)