Skip to content

Commit b9e4fcb

Browse files
Rymanalexcrichton
authored andcommitted
shootout-mandelbrot: Precalc initial values & use SIMD in the main loop. +80-100%
1 parent 03f4853 commit b9e4fcb

File tree

1 file changed

+125
-61
lines changed

1 file changed

+125
-61
lines changed

src/test/bench/shootout-mandelbrot.rs

Lines changed: 125 additions & 61 deletions
Original file line numberDiff line numberDiff line change
@@ -8,104 +8,168 @@
88
// option. This file may not be copied, modified, or distributed
99
// except according to those terms.
1010
#![feature(macro_rules)]
11+
#![feature(simd)]
12+
#![allow(experimental)]
1113

1214
// ignore-pretty very bad with line comments
1315

1416
extern crate sync;
1517

1618
use std::io;
19+
use std::os;
20+
use std::unstable::simd::f64x2;
1721
use sync::Future;
22+
use sync::Arc;
1823

1924
static ITER: int = 50;
2025
static LIMIT: f64 = 2.0;
26+
static WORKERS: uint = 16;
2127

22-
macro_rules! core_loop(
23-
($pow:expr ~ $mask:expr: $ctx:ident, $b:ident) => (
24-
{
25-
let r = $ctx.r;
26-
let i = $ctx.i;
28+
#[inline(always)]
29+
fn mandelbrot<W: io::Writer>(w: uint, mut out: W) -> io::IoResult<()> {
30+
assert!(WORKERS % 2 == 0);
2731

28-
$ctx.r = r * r - i * i + $ctx.init_r;
29-
$ctx.i = 2.0 * r * i + $ctx.init_i;
32+
// Ensure w and h are multiples of 8.
33+
let w = (w + 7) / 8 * 8;
34+
let h = w;
35+
36+
let chunk_size = h / WORKERS;
3037

31-
if r * r + i * i > LIMIT * LIMIT {
32-
$b |= $pow;
33-
if $b == $mask { break; }
38+
// Account for remainders in workload division, e.g. 1000 / 16 = 62.5
39+
let first_chunk_size = if h % WORKERS != 0 {
40+
chunk_size + h % WORKERS
41+
} else {
42+
chunk_size
43+
};
44+
45+
// precalc values
46+
let inverse_w_doubled = 2.0 / w as f64;
47+
let inverse_h_doubled = 2.0 / h as f64;
48+
let v_inverses = f64x2(inverse_w_doubled, inverse_h_doubled);
49+
let v_consts = f64x2(1.5, 1.0);
50+
51+
// A lot of this code assumes this (so do other lang benchmarks)
52+
assert!(w == h);
53+
let mut precalc_r = Vec::with_capacity(w);
54+
let mut precalc_i = Vec::with_capacity(h);
55+
56+
let precalc_futures = Vec::from_fn(WORKERS, |i| {
57+
Future::spawn(proc () {
58+
let mut rs = Vec::with_capacity(w / WORKERS);
59+
let mut is = Vec::with_capacity(w / WORKERS);
60+
61+
let start = i * chunk_size;
62+
let end = if i == 0 {
63+
first_chunk_size
64+
} else {
65+
(i + 1) * chunk_size
66+
};
67+
68+
// This assumes w == h
69+
for x in range(start, end) {
70+
let xf = x as f64;
71+
let xy = f64x2(xf, xf);
72+
73+
let f64x2(r, i) = xy * v_inverses - v_consts;
74+
rs.push(r);
75+
is.push(i);
3476
}
35-
}
36-
);
37-
)
3877

39-
#[inline(always)]
40-
fn write_line(init_i: f64, vec_init_r: &[f64], res: &mut Vec<u8>) {
41-
struct Context { r: f64, i: f64, init_i: f64, init_r: f64 }
42-
impl Context {
43-
#[inline(always)]
44-
fn new(i: f64, r: f64) -> Context {
45-
Context { r: r, i: i, init_r: r, init_i: i }
46-
}
78+
(rs, is)
79+
})
80+
});
81+
82+
for res in precalc_futures.move_iter() {
83+
let (rs, is) = res.unwrap();
84+
precalc_r.push_all_move(rs);
85+
precalc_i.push_all_move(is);
86+
}
87+
88+
assert_eq!(precalc_r.len(), w);
89+
assert_eq!(precalc_i.len(), h);
90+
91+
let arc_init_r = Arc::new(precalc_r);
92+
let arc_init_i = Arc::new(precalc_i);
93+
94+
let data = Vec::from_fn(WORKERS, |i| {
95+
let vec_init_r = arc_init_r.clone();
96+
let vec_init_i = arc_init_i.clone();
97+
98+
Future::spawn(proc () {
99+
let mut res: Vec<u8> = Vec::with_capacity((chunk_size * w) / 8);
100+
let init_r_slice = vec_init_r.as_slice();
101+
for &init_i in vec_init_i.slice(i * chunk_size, (i + 1) * chunk_size).iter() {
102+
write_line(init_i, init_r_slice, &mut res);
103+
}
104+
105+
res
106+
})
107+
});
108+
109+
try!(writeln!(&mut out as &mut Writer, "P4\n{} {}", w, h));
110+
for res in data.move_iter() {
111+
try!(out.write(res.unwrap().as_slice()));
47112
}
113+
out.flush()
114+
}
115+
116+
fn write_line(init_i: f64, vec_init_r: &[f64], res: &mut Vec<u8>) {
117+
let v_init_i : f64x2 = f64x2(init_i, init_i);
118+
let v_2 : f64x2 = f64x2(2.0, 2.0);
119+
static LIMIT_SQUARED: f64 = LIMIT * LIMIT;
48120

49-
let mut cur_byte;
50-
let mut i;
51-
let mut bit_1;
52-
let mut bit_2;
53-
let mut b;
54121
for chunk_init_r in vec_init_r.chunks(8) {
55-
cur_byte = 0xff;
56-
i = 0;
122+
let mut cur_byte = 0xff;
123+
let mut i = 0;
57124

58125
while i < 8 {
59-
bit_1 = Context::new(init_i, chunk_init_r[i]);
60-
bit_2 = Context::new(init_i, chunk_init_r[i + 1]);
126+
let v_init_r = f64x2(chunk_init_r[i], chunk_init_r[i + 1]);
127+
let mut cur_r = v_init_r;
128+
let mut cur_i = v_init_i;
129+
let mut r_sq = v_init_r * v_init_r;
130+
let mut i_sq = v_init_i * v_init_i;
61131

62-
b = 0;
132+
let mut b = 0;
63133
for _ in range(0, ITER) {
64-
core_loop!(2 ~ 3: bit_1, b);
65-
core_loop!(1 ~ 3: bit_2, b);
134+
let r = cur_r;
135+
let i = cur_i;
136+
137+
cur_i = v_2 * r * i + v_init_i;
138+
cur_r = r_sq - i_sq + v_init_r;
139+
140+
let f64x2(bit1, bit2) = r_sq + i_sq;
141+
142+
if bit1 > LIMIT_SQUARED {
143+
b |= 2;
144+
if b == 3 { break; }
145+
}
146+
147+
if bit2 > LIMIT_SQUARED {
148+
b |= 1;
149+
if b == 3 { break; }
150+
}
151+
152+
r_sq = cur_r * cur_r;
153+
i_sq = cur_i * cur_i;
66154
}
67155

68156
cur_byte = (cur_byte << 2) + b;
69157
i += 2;
70158
}
71-
res.push(cur_byte^-1);
72-
}
73-
}
74-
75-
fn mandelbrot<W: io::Writer>(w: uint, mut out: W) -> io::IoResult<()> {
76-
// Ensure w and h are multiples of 8.
77-
let w = (w + 7) / 8 * 8;
78-
let h = w;
79-
let inverse_w_doubled = 2.0 / w as f64;
80-
let inverse_h_doubled = 2.0 / h as f64;
81-
let chunk_size = h / 16;
82-
83-
let data: Vec<Future<Vec<u8>>> = range(0u, 16).map(|i| Future::spawn(proc () {
84-
let vec_init_r = Vec::from_fn(w, |x| (x as f64) * inverse_w_doubled - 1.5);
85-
let mut res: Vec<u8> = Vec::with_capacity((chunk_size * w) / 8);
86-
for y in range(i * chunk_size, (i + 1) * chunk_size) {
87-
let init_i = (y as f64) * inverse_h_doubled - 1.0;
88-
write_line(init_i, vec_init_r.as_slice(), &mut res);
89-
}
90-
res
91-
})).collect();
92159

93-
try!(writeln!(&mut out as &mut Writer, "P4\n{} {}", w, h));
94-
for res in data.move_iter() {
95-
try!(out.write(res.unwrap().as_slice()));
160+
res.push(cur_byte^-1);
96161
}
97-
out.flush()
98162
}
99163

100164
fn main() {
101-
let args = std::os::args();
165+
let args = os::args();
102166
let args = args.as_slice();
103167
let res = if args.len() < 2 {
104168
println!("Test mode: do not dump the image because it's not utf8, \
105169
which interferes with the test runner.");
106-
mandelbrot(1000, std::io::util::NullWriter)
170+
mandelbrot(1000, io::util::NullWriter)
107171
} else {
108-
mandelbrot(from_str(args[1]).unwrap(), std::io::stdout())
172+
mandelbrot(from_str(args[1]).unwrap(), io::stdout())
109173
};
110174
res.unwrap();
111175
}

0 commit comments

Comments
 (0)