Skip to content

Commit 65740ab

Browse files
authored
Documents arithmetic reduction semantics (#412)
* documents arithmetic reduction semantics
1 parent f750e2a commit 65740ab

File tree

5 files changed

+691
-60
lines changed

5 files changed

+691
-60
lines changed

ci/run.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@ set -ex
77
# Tests are all super fast anyway, and they fault often enough on travis that
88
# having only one thread increases debuggability to be worth it.
99
export RUST_TEST_THREADS=1
10-
#export RUST_BACKTRACE=1
10+
#export RUST_BACKTRACE=full
1111
#export RUST_TEST_NOCAPTURE=1
1212

1313
FEATURES="strict,$FEATURES"

coresimd/ppsv/api/arithmetic_reductions.rs

Lines changed: 76 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -4,58 +4,104 @@
44
macro_rules! impl_arithmetic_reductions {
55
($id:ident, $elem_ty:ident) => {
66
impl $id {
7-
/// Lane-wise addition of the vector elements.
7+
/// Horizontal sum of the vector elements.
88
///
9-
/// FIXME: document guarantees with respect to:
10-
/// * integers: overflow behavior
11-
/// * floats: order and NaNs
9+
/// The intrinsic performs a tree-reduction of the vector elements.
10+
/// That is, for an 8 element vector:
11+
///
12+
/// > ((x0 + x1) + (x2 + x3)) + ((x4 + x5) + (x6 + x7))
13+
///
14+
/// # Integer vectors
15+
///
16+
/// If an operation overflows it returns the mathematical result
17+
/// modulo `2^n` where `n` is the number of times it overflows.
18+
///
19+
/// # Floating-point vectors
20+
///
21+
/// If one of the vector element is `NaN` the reduction returns
22+
/// `NaN`.
1223
#[cfg(not(target_arch = "aarch64"))]
1324
#[inline]
14-
pub fn sum(self) -> $elem_ty {
25+
pub fn wrapping_sum(self) -> $elem_ty {
1526
use coresimd::simd_llvm::simd_reduce_add_ordered;
1627
unsafe { simd_reduce_add_ordered(self, 0 as $elem_ty) }
1728
}
18-
/// Lane-wise addition of the vector elements.
29+
/// Horizontal sum of the vector elements.
30+
///
31+
/// The intrinsic performs a tree-reduction of the vector elements.
32+
/// That is, for an 8 element vector:
33+
///
34+
/// > ((x0 + x1) + (x2 + x3)) + ((x4 + x5) + (x6 + x7))
35+
///
36+
/// # Integer vectors
37+
///
38+
/// If an operation overflows it returns the mathematical result
39+
/// modulo `2^n` where `n` is the number of times it overflows.
40+
///
41+
/// # Floating-point vectors
1942
///
20-
/// FIXME: document guarantees with respect to:
21-
/// * integers: overflow behavior
22-
/// * floats: order and NaNs
43+
/// If one of the vector element is `NaN` the reduction returns
44+
/// `NaN`.
2345
#[cfg(target_arch = "aarch64")]
2446
#[inline]
25-
pub fn sum(self) -> $elem_ty {
47+
pub fn wrapping_sum(self) -> $elem_ty {
2648
// FIXME: broken on AArch64
2749
// https://bugs.llvm.org/show_bug.cgi?id=36796
50+
use super::codegen::wrapping::Wrapping;
2851
let mut x = self.extract(0) as $elem_ty;
2952
for i in 1..$id::lanes() {
30-
x += self.extract(i) as $elem_ty;
53+
x = Wrapping::add(x, self.extract(i) as $elem_ty);
3154
}
3255
x
3356
}
3457

35-
/// Lane-wise multiplication of the vector elements.
58+
/// Horizontal product of the vector elements.
3659
///
37-
/// FIXME: document guarantees with respect to:
38-
/// * integers: overflow behavior
39-
/// * floats: order and NaNs
60+
/// The intrinsic performs a tree-reduction of the vector elements.
61+
/// That is, for an 8 element vector:
62+
///
63+
/// > ((x0 * x1) * (x2 * x3)) * ((x4 * x5) * (x6 * x7))
64+
///
65+
/// # Integer vectors
66+
///
67+
/// If an operation overflows it returns the mathematical result
68+
/// modulo `2^n` where `n` is the number of times it overflows.
69+
///
70+
/// # Floating-point vectors
71+
///
72+
/// If one of the vector element is `NaN` the reduction returns
73+
/// `NaN`.
4074
#[cfg(not(target_arch = "aarch64"))]
4175
#[inline]
42-
pub fn product(self) -> $elem_ty {
76+
pub fn wrapping_product(self) -> $elem_ty {
4377
use coresimd::simd_llvm::simd_reduce_mul_ordered;
4478
unsafe { simd_reduce_mul_ordered(self, 1 as $elem_ty) }
4579
}
46-
/// Lane-wise multiplication of the vector elements.
80+
/// Horizontal product of the vector elements.
81+
///
82+
/// The intrinsic performs a tree-reduction of the vector elements.
83+
/// That is, for an 8 element vector:
84+
///
85+
/// > ((x0 * x1) * (x2 * x3)) * ((x4 * x5) * (x6 * x7))
86+
///
87+
/// # Integer vectors
88+
///
89+
/// If an operation overflows it returns the mathematical result
90+
/// modulo `2^n` where `n` is the number of times it overflows.
91+
///
92+
/// # Floating-point vectors
4793
///
48-
/// FIXME: document guarantees with respect to:
49-
/// * integers: overflow behavior
50-
/// * floats: order and NaNs
94+
/// If one of the vector element is `NaN` the reduction returns
95+
/// `NaN`.
5196
#[cfg(target_arch = "aarch64")]
5297
#[inline]
53-
pub fn product(self) -> $elem_ty {
98+
pub fn wrapping_product(self) -> $elem_ty {
5499
// FIXME: broken on AArch64
55100
// https://bugs.llvm.org/show_bug.cgi?id=36796
101+
use super::codegen::wrapping::Wrapping;
56102
let mut x = self.extract(0) as $elem_ty;
57103
for i in 1..$id::lanes() {
58-
x *= self.extract(i) as $elem_ty;
104+
x = Wrapping::mul(x, self.extract(i) as $elem_ty);
59105
}
60106
x
61107
}
@@ -78,25 +124,25 @@ macro_rules! test_arithmetic_reductions {
78124
}
79125

80126
#[test]
81-
fn sum() {
127+
fn wrapping_sum() {
82128
use coresimd::simd::$id;
83129
let v = $id::splat(0 as $elem_ty);
84-
assert_eq!(v.sum(), 0 as $elem_ty);
130+
assert_eq!(v.wrapping_sum(), 0 as $elem_ty);
85131
let v = $id::splat(1 as $elem_ty);
86-
assert_eq!(v.sum(), $id::lanes() as $elem_ty);
132+
assert_eq!(v.wrapping_sum(), $id::lanes() as $elem_ty);
87133
let v = alternating(2);
88134
assert_eq!(
89-
v.sum(),
135+
v.wrapping_sum(),
90136
($id::lanes() / 2 + $id::lanes()) as $elem_ty
91137
);
92138
}
93139
#[test]
94-
fn product() {
140+
fn wrapping_product() {
95141
use coresimd::simd::$id;
96142
let v = $id::splat(0 as $elem_ty);
97-
assert_eq!(v.product(), 0 as $elem_ty);
143+
assert_eq!(v.wrapping_product(), 0 as $elem_ty);
98144
let v = $id::splat(1 as $elem_ty);
99-
assert_eq!(v.product(), 1 as $elem_ty);
145+
assert_eq!(v.wrapping_product(), 1 as $elem_ty);
100146
let f = match $id::lanes() {
101147
64 => 16,
102148
32 => 8,
@@ -105,7 +151,7 @@ macro_rules! test_arithmetic_reductions {
105151
};
106152
let v = alternating(f);
107153
assert_eq!(
108-
v.product(),
154+
v.wrapping_product(),
109155
(2_usize.pow(($id::lanes() / f) as u32) as $elem_ty)
110156
);
111157
}

coresimd/ppsv/api/minmax_reductions.rs

Lines changed: 23 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -4,22 +4,19 @@
44
macro_rules! impl_minmax_reductions {
55
($id:ident, $elem_ty:ident) => {
66
impl $id {
7-
/// Largest vector value.
8-
///
9-
/// FIXME: document behavior for float vectors with NaNs.
10-
#[cfg(not(target_arch = "aarch64"))]
7+
/// Largest vector element value.
8+
#[cfg(not(any(target_arch = "aarch64", target_arch = "arm")))]
119
#[inline]
12-
pub fn max(self) -> $elem_ty {
10+
pub fn max_element(self) -> $elem_ty {
1311
use coresimd::simd_llvm::simd_reduce_max;
1412
unsafe { simd_reduce_max(self) }
1513
}
16-
/// Largest vector value.
17-
///
18-
/// FIXME: document behavior for float vectors with NaNs.
19-
#[cfg(target_arch = "aarch64")]
14+
15+
/// Largest vector element value.
16+
#[cfg(any(target_arch = "aarch64", target_arch = "arm"))]
2017
#[allow(unused_imports)]
2118
#[inline]
22-
pub fn max(self) -> $elem_ty {
19+
pub fn max_element(self) -> $elem_ty {
2320
// FIXME: broken on AArch64
2421
// https://bugs.llvm.org/show_bug.cgi?id=36796
2522
use cmp::Ord;
@@ -31,22 +28,19 @@ macro_rules! impl_minmax_reductions {
3128
x
3229
}
3330

34-
/// Smallest vector value.
35-
///
36-
/// FIXME: document behavior for float vectors with NaNs.
37-
#[cfg(not(target_arch = "aarch64"))]
31+
/// Smallest vector element value.
32+
#[cfg(not(any(target_arch = "aarch64", target_arch = "arm")))]
3833
#[inline]
39-
pub fn min(self) -> $elem_ty {
34+
pub fn min_element(self) -> $elem_ty {
4035
use coresimd::simd_llvm::simd_reduce_min;
4136
unsafe { simd_reduce_min(self) }
4237
}
43-
/// Smallest vector value.
44-
///
45-
/// FIXME: document behavior for float vectors with NaNs.
46-
#[cfg(target_arch = "aarch64")]
38+
39+
/// Smallest vector element value.
40+
#[cfg(any(target_arch = "aarch64", target_arch = "arm"))]
4741
#[allow(unused_imports)]
4842
#[inline]
49-
pub fn min(self) -> $elem_ty {
43+
pub fn min_element(self) -> $elem_ty {
5044
// FIXME: broken on AArch64
5145
// https://bugs.llvm.org/show_bug.cgi?id=36796
5246
use cmp::Ord;
@@ -65,29 +59,29 @@ macro_rules! impl_minmax_reductions {
6559
macro_rules! test_minmax_reductions {
6660
($id:ident, $elem_ty:ident) => {
6761
#[test]
68-
fn max() {
62+
fn max_element() {
6963
use coresimd::simd::$id;
7064
let v = $id::splat(0 as $elem_ty);
71-
assert_eq!(v.max(), 0 as $elem_ty);
65+
assert_eq!(v.max_element(), 0 as $elem_ty);
7266
let v = v.replace(1, 1 as $elem_ty);
73-
assert_eq!(v.max(), 1 as $elem_ty);
67+
assert_eq!(v.max_element(), 1 as $elem_ty);
7468
let v = v.replace(0, 2 as $elem_ty);
75-
assert_eq!(v.max(), 2 as $elem_ty);
69+
assert_eq!(v.max_element(), 2 as $elem_ty);
7670
}
7771

7872
#[test]
79-
fn min() {
73+
fn min_element() {
8074
use coresimd::simd::$id;
8175
let v = $id::splat(0 as $elem_ty);
82-
assert_eq!(v.min(), 0 as $elem_ty);
76+
assert_eq!(v.min_element(), 0 as $elem_ty);
8377
let v = v.replace(1, 1 as $elem_ty);
84-
assert_eq!(v.min(), 0 as $elem_ty);
78+
assert_eq!(v.min_element(), 0 as $elem_ty);
8579
let v = $id::splat(1 as $elem_ty);
8680
let v = v.replace(0, 2 as $elem_ty);
87-
assert_eq!(v.min(), 1 as $elem_ty);
81+
assert_eq!(v.min_element(), 1 as $elem_ty);
8882
let v = $id::splat(2 as $elem_ty);
8983
let v = v.replace(1, 1 as $elem_ty);
90-
assert_eq!(v.min(), 1 as $elem_ty);
84+
assert_eq!(v.min_element(), 1 as $elem_ty);
9185
}
9286
};
9387
}

coresimd/ppsv/mod.rs

Lines changed: 49 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -78,3 +78,52 @@ impl<T> FromBits<T> for T {
7878
t
7979
}
8080
}
81+
82+
/// Workarounds code generation issues.
83+
#[cfg(target_arch = "aarch64")]
84+
mod codegen {
85+
#[cfg(target_arch = "aarch64")]
86+
pub mod wrapping {
87+
pub trait Wrapping {
88+
fn add(self, other: Self) -> Self;
89+
fn mul(self, other: Self) -> Self;
90+
}
91+
92+
macro_rules! int_impl {
93+
($id:ident) => {
94+
impl Wrapping for $id {
95+
fn add(self, other: Self) -> Self {
96+
self.wrapping_add(other)
97+
}
98+
fn mul(self, other: Self) -> Self {
99+
self.wrapping_mul(other)
100+
}
101+
}
102+
};
103+
}
104+
int_impl!(i8);
105+
int_impl!(i16);
106+
int_impl!(i32);
107+
int_impl!(i64);
108+
int_impl!(u8);
109+
int_impl!(u16);
110+
int_impl!(u32);
111+
int_impl!(u64);
112+
113+
macro_rules! float_impl {
114+
($id:ident) => {
115+
impl Wrapping for $id {
116+
fn add(self, other: Self) -> Self {
117+
self + other
118+
}
119+
fn mul(self, other: Self) -> Self {
120+
self * other
121+
}
122+
}
123+
};
124+
}
125+
float_impl!(f32);
126+
float_impl!(f64);
127+
}
128+
129+
}

0 commit comments

Comments
 (0)