Skip to content

Commit ad714f8

Browse files
authored
Merge pull request #563 from rust-ndarray/integrate-rayon
Integrate ndarray-parallel and make rayon an optional feature
2 parents 1cf7963 + b677c77 commit ad714f8

File tree

14 files changed

+1030
-6
lines changed

14 files changed

+1030
-6
lines changed

Cargo.toml

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,8 @@ num-traits = "0.2"
3131
num-complex = "0.2"
3232
itertools = { version = "0.7.0", default-features = false }
3333

34+
rayon = { version = "1.0.3", optional = true }
35+
3436
# Use via the `blas` crate feature!
3537
cblas-sys = { version = "0.1.4", optional = true, default-features = false }
3638
blas-src = { version = "0.2.0", optional = true, default-features = false }
@@ -57,7 +59,7 @@ test-blas-openblas-sys = ["blas"]
5759
test = ["test-blas-openblas-sys"]
5860

5961
# This feature is used for docs
60-
docs = ["serde-1"]
62+
docs = ["serde-1", "rayon"]
6163

6264
[profile.release]
6365
[profile.bench]

README.rst

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -52,6 +52,11 @@ your `Cargo.toml`.
5252
- Optional, compatible with Rust stable
5353
- Enables serialization support for serde 1.0
5454

55+
- ``rayon``
56+
57+
- Optional, compatible with Rust stable
58+
- Enables parallel iterators, parallelized methods and ``par_azip!``.
59+
5560
- ``blas``
5661

5762
- Optional and experimental, compatible with Rust stable

benches/par_rayon.rs

Lines changed: 157 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,157 @@
1+
#![cfg(feature="rayon")]
2+
#![feature(test)]
3+
4+
extern crate rayon;
5+
6+
extern crate ndarray;
7+
extern crate itertools;
8+
9+
use ndarray::prelude::*;
10+
use ndarray::parallel::prelude::*;
11+
12+
extern crate test;
13+
use test::Bencher;
14+
15+
use ndarray::Zip;
16+
17+
const EXP_N: usize = 256;
18+
const ADDN: usize = 512;
19+
20+
use std::cmp::max;
21+
22+
fn set_threads() {
23+
// Consider setting a fixed number of threads here, for example to avoid
24+
// oversubscribing on hyperthreaded cores.
25+
// let n = 4;
26+
// let _ = rayon::ThreadPoolBuilder::new().num_threads(n).build_global();
27+
}
28+
29+
#[bench]
30+
fn map_exp_regular(bench: &mut Bencher)
31+
{
32+
let mut a = Array2::<f64>::zeros((EXP_N, EXP_N));
33+
a.swap_axes(0, 1);
34+
bench.iter(|| {
35+
a.mapv_inplace(|x| x.exp());
36+
});
37+
}
38+
39+
#[bench]
40+
fn rayon_exp_regular(bench: &mut Bencher)
41+
{
42+
set_threads();
43+
let mut a = Array2::<f64>::zeros((EXP_N, EXP_N));
44+
a.swap_axes(0, 1);
45+
bench.iter(|| {
46+
a.view_mut().into_par_iter().for_each(|x| *x = x.exp());
47+
});
48+
}
49+
50+
const FASTEXP: usize = EXP_N;
51+
52+
#[inline]
53+
fn fastexp(x: f64) -> f64 {
54+
let x = 1. + x/1024.;
55+
x.powi(1024)
56+
}
57+
58+
#[bench]
59+
fn map_fastexp_regular(bench: &mut Bencher)
60+
{
61+
let mut a = Array2::<f64>::zeros((FASTEXP, FASTEXP));
62+
bench.iter(|| {
63+
a.mapv_inplace(|x| fastexp(x))
64+
});
65+
}
66+
67+
#[bench]
68+
fn rayon_fastexp_regular(bench: &mut Bencher)
69+
{
70+
set_threads();
71+
let mut a = Array2::<f64>::zeros((FASTEXP, FASTEXP));
72+
bench.iter(|| {
73+
a.view_mut().into_par_iter().for_each(|x| *x = fastexp(*x));
74+
});
75+
}
76+
77+
#[bench]
78+
fn map_fastexp_cut(bench: &mut Bencher)
79+
{
80+
let mut a = Array2::<f64>::zeros((FASTEXP, FASTEXP));
81+
let mut a = a.slice_mut(s![.., ..-1]);
82+
bench.iter(|| {
83+
a.mapv_inplace(|x| fastexp(x))
84+
});
85+
}
86+
87+
#[bench]
88+
fn rayon_fastexp_cut(bench: &mut Bencher)
89+
{
90+
set_threads();
91+
let mut a = Array2::<f64>::zeros((FASTEXP, FASTEXP));
92+
let mut a = a.slice_mut(s![.., ..-1]);
93+
bench.iter(|| {
94+
a.view_mut().into_par_iter().for_each(|x| *x = fastexp(*x));
95+
});
96+
}
97+
98+
#[bench]
99+
fn map_fastexp_by_axis(bench: &mut Bencher)
100+
{
101+
let mut a = Array2::<f64>::zeros((FASTEXP, FASTEXP));
102+
bench.iter(|| {
103+
for mut sheet in a.axis_iter_mut(Axis(0)) {
104+
sheet.mapv_inplace(fastexp)
105+
}
106+
});
107+
}
108+
109+
#[bench]
110+
fn rayon_fastexp_by_axis(bench: &mut Bencher)
111+
{
112+
set_threads();
113+
let mut a = Array2::<f64>::zeros((FASTEXP, FASTEXP));
114+
bench.iter(|| {
115+
a.axis_iter_mut(Axis(0)).into_par_iter()
116+
.for_each(|mut sheet| sheet.mapv_inplace(fastexp));
117+
});
118+
}
119+
120+
#[bench]
121+
fn rayon_fastexp_zip(bench: &mut Bencher)
122+
{
123+
set_threads();
124+
let mut a = Array2::<f64>::zeros((FASTEXP, FASTEXP));
125+
bench.iter(|| {
126+
Zip::from(&mut a).into_par_iter().for_each(|(elt, )| *elt = fastexp(*elt));
127+
});
128+
}
129+
130+
#[bench]
131+
fn add(bench: &mut Bencher)
132+
{
133+
let mut a = Array2::<f64>::zeros((ADDN, ADDN));
134+
let b = Array2::<f64>::zeros((ADDN, ADDN));
135+
let c = Array2::<f64>::zeros((ADDN, ADDN));
136+
let d = Array2::<f64>::zeros((ADDN, ADDN));
137+
bench.iter(|| {
138+
azip!(mut a, b, c, d in {
139+
*a += b.exp() + c.exp() + d.exp();
140+
});
141+
});
142+
}
143+
144+
#[bench]
145+
fn rayon_add(bench: &mut Bencher)
146+
{
147+
set_threads();
148+
let mut a = Array2::<f64>::zeros((ADDN, ADDN));
149+
let b = Array2::<f64>::zeros((ADDN, ADDN));
150+
let c = Array2::<f64>::zeros((ADDN, ADDN));
151+
let d = Array2::<f64>::zeros((ADDN, ADDN));
152+
bench.iter(|| {
153+
par_azip!(mut a, b, c, d in {
154+
*a += b.exp() + c.exp() + d.exp();
155+
});
156+
});
157+
}

scripts/all-tests.sh

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,6 @@ cargo test --verbose --no-default-features
1111
cargo test --release --verbose --no-default-features
1212
cargo build --verbose --features "$FEATURES"
1313
cargo test --verbose --features "$FEATURES"
14-
cargo test --manifest-path=parallel/Cargo.toml --verbose
1514
cargo test --manifest-path=serialization-tests/Cargo.toml --verbose
1615
cargo test --manifest-path=blas-tests/Cargo.toml --verbose
1716
CARGO_TARGET_DIR=target/ cargo test --manifest-path=numeric-tests/Cargo.toml --verbose

src/lib.rs

Lines changed: 10 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -55,11 +55,8 @@
5555
//! needs matching memory layout to be efficient (with some exceptions).
5656
//! + Efficient floating point matrix multiplication even for very large
5757
//! matrices; can optionally use BLAS to improve it further.
58-
//! + See also the [`ndarray-parallel`] crate for integration with rayon.
5958
//! - **Requires Rust 1.30**
6059
//!
61-
//! [`ndarray-parallel`]: https://docs.rs/ndarray-parallel
62-
//!
6360
//! ## Crate Feature Flags
6461
//!
6562
//! The following crate feature flags are available. They are configured in your
@@ -68,6 +65,9 @@
6865
//! - `serde-1`
6966
//! - Optional, compatible with Rust stable
7067
//! - Enables serialization support for serde 1.0
68+
//! - `rayon`
69+
//! - Optional, compatible with Rust stable
70+
//! - Enables parallel iterators, parallelized methods and [`par_azip!`].
7171
//! - `blas`
7272
//! - Optional and experimental, compatible with Rust stable
7373
//! - Enable transparent BLAS support for matrix multiplication.
@@ -87,6 +87,9 @@
8787
#[cfg(feature = "serde-1")]
8888
extern crate serde;
8989

90+
#[cfg(feature="rayon")]
91+
extern crate rayon;
92+
9093
#[cfg(feature="blas")]
9194
extern crate cblas_sys;
9295
#[cfg(feature="blas")]
@@ -1333,6 +1336,10 @@ impl<A, S, D> ArrayBase<S, D>
13331336
}
13341337

13351338

1339+
// parallel methods
1340+
#[cfg(feature="rayon")]
1341+
pub mod parallel;
1342+
13361343
mod impl_1d;
13371344
mod impl_2d;
13381345
mod impl_dyn;

src/parallel/impl_par_methods.rs

Lines changed: 85 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,85 @@
1+
2+
use {
3+
Dimension,
4+
NdProducer,
5+
Zip,
6+
ArrayBase,
7+
DataMut,
8+
};
9+
10+
use parallel::prelude::*;
11+
12+
13+
/// # Parallel methods
14+
///
15+
/// These methods require crate feature `rayon`.
16+
impl<A, S, D> ArrayBase<S, D>
17+
where S: DataMut<Elem=A>,
18+
D: Dimension,
19+
A: Send + Sync,
20+
{
21+
/// Parallel version of `map_inplace`.
22+
///
23+
/// Modify the array in place by calling `f` by mutable reference on each element.
24+
///
25+
/// Elements are visited in arbitrary order.
26+
pub fn par_map_inplace<F>(&mut self, f: F)
27+
where F: Fn(&mut A) + Sync + Send
28+
{
29+
self.view_mut().into_par_iter().for_each(f)
30+
}
31+
32+
/// Parallel version of `mapv_inplace`.
33+
///
34+
/// Modify the array in place by calling `f` by **v**alue on each element.
35+
/// The array is updated with the new values.
36+
///
37+
/// Elements are visited in arbitrary order.
38+
pub fn par_mapv_inplace<F>(&mut self, f: F)
39+
where F: Fn(A) -> A + Sync + Send,
40+
A: Clone,
41+
{
42+
self.view_mut().into_par_iter()
43+
.for_each(move |x| *x = f(x.clone()))
44+
}
45+
}
46+
47+
48+
49+
50+
// Zip
51+
52+
macro_rules! zip_impl {
53+
($([$($p:ident)*],)+) => {
54+
$(
55+
#[allow(non_snake_case)]
56+
impl<D, $($p),*> Zip<($($p,)*), D>
57+
where $($p::Item : Send , )*
58+
$($p : Send , )*
59+
D: Dimension,
60+
$($p: NdProducer<Dim=D> ,)*
61+
{
62+
/// The `par_apply` method for `Zip`.
63+
///
64+
/// This is a shorthand for using `.into_par_iter().for_each()` on
65+
/// `Zip`.
66+
///
67+
/// Requires crate feature `rayon`.
68+
pub fn par_apply<F>(self, function: F)
69+
where F: Fn($($p::Item),*) + Sync + Send
70+
{
71+
self.into_par_iter().for_each(move |($($p,)*)| function($($p),*))
72+
}
73+
}
74+
)+
75+
}
76+
}
77+
78+
zip_impl!{
79+
[P1],
80+
[P1 P2],
81+
[P1 P2 P3],
82+
[P1 P2 P3 P4],
83+
[P1 P2 P3 P4 P5],
84+
[P1 P2 P3 P4 P5 P6],
85+
}

src/parallel/into_impls.rs

Lines changed: 54 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,54 @@
1+
use {Array, ArcArray, Dimension, ArrayView, ArrayViewMut};
2+
3+
use super::prelude::IntoParallelIterator;
4+
use super::Parallel;
5+
6+
/// Requires crate feature `rayon`.
7+
impl<'a, A, D> IntoParallelIterator for &'a Array<A, D>
8+
where D: Dimension,
9+
A: Sync
10+
{
11+
type Item = &'a A;
12+
type Iter = Parallel<ArrayView<'a, A, D>>;
13+
fn into_par_iter(self) -> Self::Iter {
14+
self.view().into_par_iter()
15+
}
16+
}
17+
18+
// This is allowed: goes through `.view()`
19+
/// Requires crate feature `rayon`.
20+
impl<'a, A, D> IntoParallelIterator for &'a ArcArray<A, D>
21+
where D: Dimension,
22+
A: Sync
23+
{
24+
type Item = &'a A;
25+
type Iter = Parallel<ArrayView<'a, A, D>>;
26+
fn into_par_iter(self) -> Self::Iter {
27+
self.view().into_par_iter()
28+
}
29+
}
30+
31+
/// Requires crate feature `rayon`.
32+
impl<'a, A, D> IntoParallelIterator for &'a mut Array<A, D>
33+
where D: Dimension,
34+
A: Sync + Send
35+
{
36+
type Item = &'a mut A;
37+
type Iter = Parallel<ArrayViewMut<'a, A, D>>;
38+
fn into_par_iter(self) -> Self::Iter {
39+
self.view_mut().into_par_iter()
40+
}
41+
}
42+
43+
// This is allowed: goes through `.view_mut()`, which is unique access
44+
/// Requires crate feature `rayon`.
45+
impl<'a, A, D> IntoParallelIterator for &'a mut ArcArray<A, D>
46+
where D: Dimension,
47+
A: Sync + Send + Clone,
48+
{
49+
type Item = &'a mut A;
50+
type Iter = Parallel<ArrayViewMut<'a, A, D>>;
51+
fn into_par_iter(self) -> Self::Iter {
52+
self.view_mut().into_par_iter()
53+
}
54+
}

0 commit comments

Comments
 (0)