|
8 | 8 | // option. This file may not be copied, modified, or distributed
|
9 | 9 | // except according to those terms.
|
10 | 10 | #![feature(macro_rules)]
|
| 11 | +#![feature(simd)] |
| 12 | +#![allow(experimental)] |
11 | 13 |
|
12 | 14 | // ignore-pretty very bad with line comments
|
13 | 15 |
|
14 | 16 | extern crate sync;
|
15 | 17 |
|
16 | 18 | use std::io;
|
| 19 | +use std::os; |
| 20 | +use std::unstable::simd::f64x2; |
17 | 21 | use sync::Future;
|
| 22 | +use sync::Arc; |
18 | 23 |
|
19 | 24 | static ITER: int = 50;
|
20 | 25 | static LIMIT: f64 = 2.0;
|
| 26 | +static WORKERS: uint = 16; |
21 | 27 |
|
22 |
| -macro_rules! core_loop( |
23 |
| - ($pow:expr ~ $mask:expr: $ctx:ident, $b:ident) => ( |
24 |
| - { |
25 |
| - let r = $ctx.r; |
26 |
| - let i = $ctx.i; |
| 28 | +#[inline(always)] |
| 29 | +fn mandelbrot<W: io::Writer>(w: uint, mut out: W) -> io::IoResult<()> { |
| 30 | + assert!(WORKERS % 2 == 0); |
27 | 31 |
|
28 |
| - $ctx.r = r * r - i * i + $ctx.init_r; |
29 |
| - $ctx.i = 2.0 * r * i + $ctx.init_i; |
| 32 | + // Ensure w and h are multiples of 8. |
| 33 | + let w = (w + 7) / 8 * 8; |
| 34 | + let h = w; |
| 35 | + |
| 36 | + let chunk_size = h / WORKERS; |
30 | 37 |
|
31 |
| - if r * r + i * i > LIMIT * LIMIT { |
32 |
| - $b |= $pow; |
33 |
| - if $b == $mask { break; } |
| 38 | + // Account for remainders in workload division, e.g. 1000 / 16 = 62.5 |
| 39 | + let first_chunk_size = if h % WORKERS != 0 { |
| 40 | + chunk_size + h % WORKERS |
| 41 | + } else { |
| 42 | + chunk_size |
| 43 | + }; |
| 44 | + |
| 45 | + // precalc values |
| 46 | + let inverse_w_doubled = 2.0 / w as f64; |
| 47 | + let inverse_h_doubled = 2.0 / h as f64; |
| 48 | + let v_inverses = f64x2(inverse_w_doubled, inverse_h_doubled); |
| 49 | + let v_consts = f64x2(1.5, 1.0); |
| 50 | + |
| 51 | + // A lot of this code assumes this (so do other lang benchmarks) |
| 52 | + assert!(w == h); |
| 53 | + let mut precalc_r = Vec::with_capacity(w); |
| 54 | + let mut precalc_i = Vec::with_capacity(h); |
| 55 | + |
| 56 | + let precalc_futures = Vec::from_fn(WORKERS, |i| { |
| 57 | + Future::spawn(proc () { |
| 58 | + let mut rs = Vec::with_capacity(w / WORKERS); |
| 59 | + let mut is = Vec::with_capacity(w / WORKERS); |
| 60 | + |
| 61 | + let start = i * chunk_size; |
| 62 | + let end = if i == 0 { |
| 63 | + first_chunk_size |
| 64 | + } else { |
| 65 | + (i + 1) * chunk_size |
| 66 | + }; |
| 67 | + |
| 68 | + // This assumes w == h |
| 69 | + for x in range(start, end) { |
| 70 | + let xf = x as f64; |
| 71 | + let xy = f64x2(xf, xf); |
| 72 | + |
| 73 | + let f64x2(r, i) = xy * v_inverses - v_consts; |
| 74 | + rs.push(r); |
| 75 | + is.push(i); |
34 | 76 | }
|
35 |
| - } |
36 |
| - ); |
37 |
| -) |
38 | 77 |
|
39 |
| -#[inline(always)] |
40 |
| -fn write_line(init_i: f64, vec_init_r: &[f64], res: &mut Vec<u8>) { |
41 |
| - struct Context { r: f64, i: f64, init_i: f64, init_r: f64 } |
42 |
| - impl Context { |
43 |
| - #[inline(always)] |
44 |
| - fn new(i: f64, r: f64) -> Context { |
45 |
| - Context { r: r, i: i, init_r: r, init_i: i } |
46 |
| - } |
| 78 | + (rs, is) |
| 79 | + }) |
| 80 | + }); |
| 81 | + |
| 82 | + for res in precalc_futures.move_iter() { |
| 83 | + let (rs, is) = res.unwrap(); |
| 84 | + precalc_r.push_all_move(rs); |
| 85 | + precalc_i.push_all_move(is); |
| 86 | + } |
| 87 | + |
| 88 | + assert_eq!(precalc_r.len(), w); |
| 89 | + assert_eq!(precalc_i.len(), h); |
| 90 | + |
| 91 | + let arc_init_r = Arc::new(precalc_r); |
| 92 | + let arc_init_i = Arc::new(precalc_i); |
| 93 | + |
| 94 | + let data = Vec::from_fn(WORKERS, |i| { |
| 95 | + let vec_init_r = arc_init_r.clone(); |
| 96 | + let vec_init_i = arc_init_i.clone(); |
| 97 | + |
| 98 | + Future::spawn(proc () { |
| 99 | + let mut res: Vec<u8> = Vec::with_capacity((chunk_size * w) / 8); |
| 100 | + let init_r_slice = vec_init_r.as_slice(); |
| 101 | + for &init_i in vec_init_i.slice(i * chunk_size, (i + 1) * chunk_size).iter() { |
| 102 | + write_line(init_i, init_r_slice, &mut res); |
| 103 | + } |
| 104 | + |
| 105 | + res |
| 106 | + }) |
| 107 | + }); |
| 108 | + |
| 109 | + try!(writeln!(&mut out as &mut Writer, "P4\n{} {}", w, h)); |
| 110 | + for res in data.move_iter() { |
| 111 | + try!(out.write(res.unwrap().as_slice())); |
47 | 112 | }
|
| 113 | + out.flush() |
| 114 | +} |
| 115 | + |
| 116 | +fn write_line(init_i: f64, vec_init_r: &[f64], res: &mut Vec<u8>) { |
| 117 | + let v_init_i : f64x2 = f64x2(init_i, init_i); |
| 118 | + let v_2 : f64x2 = f64x2(2.0, 2.0); |
| 119 | + static LIMIT_SQUARED: f64 = LIMIT * LIMIT; |
48 | 120 |
|
49 |
| - let mut cur_byte; |
50 |
| - let mut i; |
51 |
| - let mut bit_1; |
52 |
| - let mut bit_2; |
53 |
| - let mut b; |
54 | 121 | for chunk_init_r in vec_init_r.chunks(8) {
|
55 |
| - cur_byte = 0xff; |
56 |
| - i = 0; |
| 122 | + let mut cur_byte = 0xff; |
| 123 | + let mut i = 0; |
57 | 124 |
|
58 | 125 | while i < 8 {
|
59 |
| - bit_1 = Context::new(init_i, chunk_init_r[i]); |
60 |
| - bit_2 = Context::new(init_i, chunk_init_r[i + 1]); |
| 126 | + let v_init_r = f64x2(chunk_init_r[i], chunk_init_r[i + 1]); |
| 127 | + let mut cur_r = v_init_r; |
| 128 | + let mut cur_i = v_init_i; |
| 129 | + let mut r_sq = v_init_r * v_init_r; |
| 130 | + let mut i_sq = v_init_i * v_init_i; |
61 | 131 |
|
62 |
| - b = 0; |
| 132 | + let mut b = 0; |
63 | 133 | for _ in range(0, ITER) {
|
64 |
| - core_loop!(2 ~ 3: bit_1, b); |
65 |
| - core_loop!(1 ~ 3: bit_2, b); |
| 134 | + let r = cur_r; |
| 135 | + let i = cur_i; |
| 136 | + |
| 137 | + cur_i = v_2 * r * i + v_init_i; |
| 138 | + cur_r = r_sq - i_sq + v_init_r; |
| 139 | + |
| 140 | + let f64x2(bit1, bit2) = r_sq + i_sq; |
| 141 | + |
| 142 | + if bit1 > LIMIT_SQUARED { |
| 143 | + b |= 2; |
| 144 | + if b == 3 { break; } |
| 145 | + } |
| 146 | + |
| 147 | + if bit2 > LIMIT_SQUARED { |
| 148 | + b |= 1; |
| 149 | + if b == 3 { break; } |
| 150 | + } |
| 151 | + |
| 152 | + r_sq = cur_r * cur_r; |
| 153 | + i_sq = cur_i * cur_i; |
66 | 154 | }
|
67 | 155 |
|
68 | 156 | cur_byte = (cur_byte << 2) + b;
|
69 | 157 | i += 2;
|
70 | 158 | }
|
71 |
| - res.push(cur_byte^-1); |
72 |
| - } |
73 |
| -} |
74 |
| - |
75 |
| -fn mandelbrot<W: io::Writer>(w: uint, mut out: W) -> io::IoResult<()> { |
76 |
| - // Ensure w and h are multiples of 8. |
77 |
| - let w = (w + 7) / 8 * 8; |
78 |
| - let h = w; |
79 |
| - let inverse_w_doubled = 2.0 / w as f64; |
80 |
| - let inverse_h_doubled = 2.0 / h as f64; |
81 |
| - let chunk_size = h / 16; |
82 |
| - |
83 |
| - let data: Vec<Future<Vec<u8>>> = range(0u, 16).map(|i| Future::spawn(proc () { |
84 |
| - let vec_init_r = Vec::from_fn(w, |x| (x as f64) * inverse_w_doubled - 1.5); |
85 |
| - let mut res: Vec<u8> = Vec::with_capacity((chunk_size * w) / 8); |
86 |
| - for y in range(i * chunk_size, (i + 1) * chunk_size) { |
87 |
| - let init_i = (y as f64) * inverse_h_doubled - 1.0; |
88 |
| - write_line(init_i, vec_init_r.as_slice(), &mut res); |
89 |
| - } |
90 |
| - res |
91 |
| - })).collect(); |
92 | 159 |
|
93 |
| - try!(writeln!(&mut out as &mut Writer, "P4\n{} {}", w, h)); |
94 |
| - for res in data.move_iter() { |
95 |
| - try!(out.write(res.unwrap().as_slice())); |
| 160 | + res.push(cur_byte^-1); |
96 | 161 | }
|
97 |
| - out.flush() |
98 | 162 | }
|
99 | 163 |
|
100 | 164 | fn main() {
|
101 |
| - let args = std::os::args(); |
| 165 | + let args = os::args(); |
102 | 166 | let args = args.as_slice();
|
103 | 167 | let res = if args.len() < 2 {
|
104 | 168 | println!("Test mode: do not dump the image because it's not utf8, \
|
105 | 169 | which interferes with the test runner.");
|
106 |
| - mandelbrot(1000, std::io::util::NullWriter) |
| 170 | + mandelbrot(1000, io::util::NullWriter) |
107 | 171 | } else {
|
108 |
| - mandelbrot(from_str(args[1]).unwrap(), std::io::stdout()) |
| 172 | + mandelbrot(from_str(args[1]).unwrap(), io::stdout()) |
109 | 173 | };
|
110 | 174 | res.unwrap();
|
111 | 175 | }
|
0 commit comments