Skip to content

Commit 6a4ec75

Browse files
committed
documents arithmetic reduction semantics
1 parent cd5b544 commit 6a4ec75

File tree

4 files changed

+622
-56
lines changed

4 files changed

+622
-56
lines changed

ci/run.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@ set -ex
77
# Tests are all super fast anyway, and they fault often enough on travis that
88
# having only one thread increases debuggability to be worth it.
99
export RUST_TEST_THREADS=1
10-
#export RUST_BACKTRACE=1
10+
export RUST_BACKTRACE=1
1111
#export RUST_TEST_NOCAPTURE=1
1212

1313
FEATURES="strict,$FEATURES"

coresimd/ppsv/api/arithmetic_reductions.rs

Lines changed: 74 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -4,58 +4,102 @@
44
macro_rules! impl_arithmetic_reductions {
55
($id:ident, $elem_ty:ident) => {
66
impl $id {
7-
/// Lane-wise addition of the vector elements.
7+
/// Horizontal sum of the vector elements.
88
///
9-
/// FIXME: document guarantees with respect to:
10-
/// * integers: overflow behavior
11-
/// * floats: order and NaNs
9+
/// The intrinsic performs a tree-reduction of the vector elements.
10+
/// That is, for an 8 element vector:
11+
///
12+
/// > ((x0 + x1) + (x2 + x3)) + ((x4 + x5) + (x6 + x7))
13+
///
14+
/// # Integer vectors
15+
///
16+
/// If an operation overflows it returns the mathematical result
17+
/// modulo `2^n` where `n` is the number of times it overflows.
18+
///
19+
/// # Floating-point vectors
20+
///
21+
/// If one of the vector element is `NaN` the reduction returns
22+
/// `NaN`.
1223
#[cfg(not(target_arch = "aarch64"))]
1324
#[inline]
14-
pub fn sum(self) -> $elem_ty {
25+
pub fn wrapping_sum(self) -> $elem_ty {
1526
use coresimd::simd_llvm::simd_reduce_add_ordered;
1627
unsafe { simd_reduce_add_ordered(self, 0 as $elem_ty) }
1728
}
18-
/// Lane-wise addition of the vector elements.
29+
/// Horizontal sum of the vector elements.
30+
///
31+
/// The intrinsic performs a tree-reduction of the vector elements.
32+
/// That is, for an 8 element vector:
33+
///
34+
/// > ((x0 + x1) + (x2 + x3)) + ((x4 + x5) + (x6 + x7))
35+
///
36+
/// # Integer vectors
37+
///
38+
/// If an operation overflows it returns the mathematical result
39+
/// modulo `2^n` where `n` is the number of times it overflows.
40+
///
41+
/// # Floating-point vectors
1942
///
20-
/// FIXME: document guarantees with respect to:
21-
/// * integers: overflow behavior
22-
/// * floats: order and NaNs
43+
/// If one of the vector element is `NaN` the reduction returns
44+
/// `NaN`.
2345
#[cfg(target_arch = "aarch64")]
2446
#[inline]
25-
pub fn sum(self) -> $elem_ty {
47+
pub fn wrapping_sum(self) -> $elem_ty {
2648
// FIXME: broken on AArch64
2749
// https://bugs.llvm.org/show_bug.cgi?id=36796
2850
let mut x = self.extract(0) as $elem_ty;
2951
for i in 1..$id::lanes() {
30-
x += self.extract(i) as $elem_ty;
52+
x = x.wrapping_add(self.extract(i) as $elem_ty);
3153
}
3254
x
3355
}
3456

35-
/// Lane-wise multiplication of the vector elements.
57+
/// Horizontal product of the vector elements.
3658
///
37-
/// FIXME: document guarantees with respect to:
38-
/// * integers: overflow behavior
39-
/// * floats: order and NaNs
59+
/// The intrinsic performs a tree-reduction of the vector elements.
60+
/// That is, for an 8 element vector:
61+
///
62+
/// > ((x0 * x1) * (x2 * x3)) * ((x4 * x5) * (x6 * x7))
63+
///
64+
/// # Integer vectors
65+
///
66+
/// If an operation overflows it returns the mathematical result
67+
/// modulo `2^n` where `n` is the number of times it overflows.
68+
///
69+
/// # Floating-point vectors
70+
///
71+
/// If one of the vector element is `NaN` the reduction returns
72+
/// `NaN`.
4073
#[cfg(not(target_arch = "aarch64"))]
4174
#[inline]
42-
pub fn product(self) -> $elem_ty {
75+
pub fn wrapping_product(self) -> $elem_ty {
4376
use coresimd::simd_llvm::simd_reduce_mul_ordered;
4477
unsafe { simd_reduce_mul_ordered(self, 1 as $elem_ty) }
4578
}
46-
/// Lane-wise multiplication of the vector elements.
79+
/// Horizontal product of the vector elements.
80+
///
81+
/// The intrinsic performs a tree-reduction of the vector elements.
82+
/// That is, for an 8 element vector:
83+
///
84+
/// > ((x0 * x1) * (x2 * x3)) * ((x4 * x5) * (x6 * x7))
85+
///
86+
/// # Integer vectors
87+
///
88+
/// If an operation overflows it returns the mathematical result
89+
/// modulo `2^n` where `n` is the number of times it overflows.
90+
///
91+
/// # Floating-point vectors
4792
///
48-
/// FIXME: document guarantees with respect to:
49-
/// * integers: overflow behavior
50-
/// * floats: order and NaNs
93+
/// If one of the vector element is `NaN` the reduction returns
94+
/// `NaN`.
5195
#[cfg(target_arch = "aarch64")]
5296
#[inline]
53-
pub fn product(self) -> $elem_ty {
97+
pub fn wrapping_product(self) -> $elem_ty {
5498
// FIXME: broken on AArch64
5599
// https://bugs.llvm.org/show_bug.cgi?id=36796
56100
let mut x = self.extract(0) as $elem_ty;
57101
for i in 1..$id::lanes() {
58-
x *= self.extract(i) as $elem_ty;
102+
x = x.wrapping_mul(self.extract(i) as $elem_ty);
59103
}
60104
x
61105
}
@@ -78,25 +122,25 @@ macro_rules! test_arithmetic_reductions {
78122
}
79123

80124
#[test]
81-
fn sum() {
125+
fn wrapping_sum() {
82126
use coresimd::simd::$id;
83127
let v = $id::splat(0 as $elem_ty);
84-
assert_eq!(v.sum(), 0 as $elem_ty);
128+
assert_eq!(v.wrapping_sum(), 0 as $elem_ty);
85129
let v = $id::splat(1 as $elem_ty);
86-
assert_eq!(v.sum(), $id::lanes() as $elem_ty);
130+
assert_eq!(v.wrapping_sum(), $id::lanes() as $elem_ty);
87131
let v = alternating(2);
88132
assert_eq!(
89-
v.sum(),
133+
v.wrapping_sum(),
90134
($id::lanes() / 2 + $id::lanes()) as $elem_ty
91135
);
92136
}
93137
#[test]
94-
fn product() {
138+
fn wrapping_product() {
95139
use coresimd::simd::$id;
96140
let v = $id::splat(0 as $elem_ty);
97-
assert_eq!(v.product(), 0 as $elem_ty);
141+
assert_eq!(v.wrapping_product(), 0 as $elem_ty);
98142
let v = $id::splat(1 as $elem_ty);
99-
assert_eq!(v.product(), 1 as $elem_ty);
143+
assert_eq!(v.wrapping_product(), 1 as $elem_ty);
100144
let f = match $id::lanes() {
101145
64 => 16,
102146
32 => 8,
@@ -105,7 +149,7 @@ macro_rules! test_arithmetic_reductions {
105149
};
106150
let v = alternating(f);
107151
assert_eq!(
108-
v.product(),
152+
v.wrapping_product(),
109153
(2_usize.pow(($id::lanes() / f) as u32) as $elem_ty)
110154
);
111155
}

coresimd/ppsv/api/minmax_reductions.rs

Lines changed: 19 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -4,22 +4,19 @@
44
macro_rules! impl_minmax_reductions {
55
($id:ident, $elem_ty:ident) => {
66
impl $id {
7-
/// Largest vector value.
8-
///
9-
/// FIXME: document behavior for float vectors with NaNs.
7+
/// Largest vector element value.
108
#[cfg(not(target_arch = "aarch64"))]
119
#[inline]
12-
pub fn max(self) -> $elem_ty {
10+
pub fn max_element(self) -> $elem_ty {
1311
use coresimd::simd_llvm::simd_reduce_max;
1412
unsafe { simd_reduce_max(self) }
1513
}
16-
/// Largest vector value.
17-
///
18-
/// FIXME: document behavior for float vectors with NaNs.
14+
15+
/// Largest vector element value.
1916
#[cfg(target_arch = "aarch64")]
2017
#[allow(unused_imports)]
2118
#[inline]
22-
pub fn max(self) -> $elem_ty {
19+
pub fn max_element(self) -> $elem_ty {
2320
// FIXME: broken on AArch64
2421
// https://bugs.llvm.org/show_bug.cgi?id=36796
2522
use cmp::Ord;
@@ -31,22 +28,19 @@ macro_rules! impl_minmax_reductions {
3128
x
3229
}
3330

34-
/// Smallest vector value.
35-
///
36-
/// FIXME: document behavior for float vectors with NaNs.
31+
/// Smallest vector element value.
3732
#[cfg(not(target_arch = "aarch64"))]
3833
#[inline]
39-
pub fn min(self) -> $elem_ty {
34+
pub fn min_element(self) -> $elem_ty {
4035
use coresimd::simd_llvm::simd_reduce_min;
4136
unsafe { simd_reduce_min(self) }
4237
}
43-
/// Smallest vector value.
44-
///
45-
/// FIXME: document behavior for float vectors with NaNs.
38+
39+
/// Smallest vector element value.
4640
#[cfg(target_arch = "aarch64")]
4741
#[allow(unused_imports)]
4842
#[inline]
49-
pub fn min(self) -> $elem_ty {
43+
pub fn min_element(self) -> $elem_ty {
5044
// FIXME: broken on AArch64
5145
// https://bugs.llvm.org/show_bug.cgi?id=36796
5246
use cmp::Ord;
@@ -65,29 +59,29 @@ macro_rules! impl_minmax_reductions {
6559
macro_rules! test_minmax_reductions {
6660
($id:ident, $elem_ty:ident) => {
6761
#[test]
68-
fn max() {
62+
fn max_element() {
6963
use coresimd::simd::$id;
7064
let v = $id::splat(0 as $elem_ty);
71-
assert_eq!(v.max(), 0 as $elem_ty);
65+
assert_eq!(v.max_element(), 0 as $elem_ty);
7266
let v = v.replace(1, 1 as $elem_ty);
73-
assert_eq!(v.max(), 1 as $elem_ty);
67+
assert_eq!(v.max_element(), 1 as $elem_ty);
7468
let v = v.replace(0, 2 as $elem_ty);
75-
assert_eq!(v.max(), 2 as $elem_ty);
69+
assert_eq!(v.max_element(), 2 as $elem_ty);
7670
}
7771

7872
#[test]
79-
fn min() {
73+
fn min_element() {
8074
use coresimd::simd::$id;
8175
let v = $id::splat(0 as $elem_ty);
82-
assert_eq!(v.min(), 0 as $elem_ty);
76+
assert_eq!(v.min_element(), 0 as $elem_ty);
8377
let v = v.replace(1, 1 as $elem_ty);
84-
assert_eq!(v.min(), 0 as $elem_ty);
78+
assert_eq!(v.min_element(), 0 as $elem_ty);
8579
let v = $id::splat(1 as $elem_ty);
8680
let v = v.replace(0, 2 as $elem_ty);
87-
assert_eq!(v.min(), 1 as $elem_ty);
81+
assert_eq!(v.min_element(), 1 as $elem_ty);
8882
let v = $id::splat(2 as $elem_ty);
8983
let v = v.replace(1, 1 as $elem_ty);
90-
assert_eq!(v.min(), 1 as $elem_ty);
84+
assert_eq!(v.min_element(), 1 as $elem_ty);
9185
}
9286
};
9387
}

0 commit comments

Comments
 (0)