4
4
macro_rules! impl_arithmetic_reductions {
5
5
( $id: ident, $elem_ty: ident) => {
6
6
impl $id {
7
- /// Lane-wise addition of the vector elements.
7
+ /// Horizontal sum of the vector elements.
8
8
///
9
- /// FIXME: document guarantees with respect to:
10
- /// * integers: overflow behavior
11
- /// * floats: order and NaNs
9
+ /// The intrinsic performs a tree-reduction of the vector elements.
10
+ /// That is, for an 8 element vector:
11
+ ///
12
+ /// > ((x0 + x1) + (x2 + x3)) + ((x4 + x5) + (x6 + x7))
13
+ ///
14
+ /// # Integer vectors
15
+ ///
16
+ /// If an operation overflows it returns the mathematical result
17
+ /// modulo `2^n` where `n` is the number of times it overflows.
18
+ ///
19
+ /// # Floating-point vectors
20
+ ///
21
+ /// If one of the vector element is `NaN` the reduction returns
22
+ /// `NaN`.
12
23
#[ cfg( not( target_arch = "aarch64" ) ) ]
13
24
#[ inline]
14
- pub fn sum ( self ) -> $elem_ty {
25
+ pub fn wrapping_sum ( self ) -> $elem_ty {
15
26
use coresimd:: simd_llvm:: simd_reduce_add_ordered;
16
27
unsafe { simd_reduce_add_ordered( self , 0 as $elem_ty) }
17
28
}
18
- /// Lane-wise addition of the vector elements.
29
+ /// Horizontal sum of the vector elements.
30
+ ///
31
+ /// The intrinsic performs a tree-reduction of the vector elements.
32
+ /// That is, for an 8 element vector:
33
+ ///
34
+ /// > ((x0 + x1) + (x2 + x3)) + ((x4 + x5) + (x6 + x7))
35
+ ///
36
+ /// # Integer vectors
37
+ ///
38
+ /// If an operation overflows it returns the mathematical result
39
+ /// modulo `2^n` where `n` is the number of times it overflows.
40
+ ///
41
+ /// # Floating-point vectors
19
42
///
20
- /// FIXME: document guarantees with respect to:
21
- /// * integers: overflow behavior
22
- /// * floats: order and NaNs
43
+ /// If one of the vector element is `NaN` the reduction returns
44
+ /// `NaN`.
23
45
#[ cfg( target_arch = "aarch64" ) ]
24
46
#[ inline]
25
- pub fn sum ( self ) -> $elem_ty {
47
+ pub fn wrapping_sum ( self ) -> $elem_ty {
26
48
// FIXME: broken on AArch64
27
49
// https://bugs.llvm.org/show_bug.cgi?id=36796
28
50
let mut x = self . extract( 0 ) as $elem_ty;
@@ -32,25 +54,47 @@ macro_rules! impl_arithmetic_reductions {
32
54
x
33
55
}
34
56
35
- /// Lane-wise multiplication of the vector elements.
57
+ /// Horizontal product of the vector elements.
36
58
///
37
- /// FIXME: document guarantees with respect to:
38
- /// * integers: overflow behavior
39
- /// * floats: order and NaNs
59
+ /// The intrinsic performs a tree-reduction of the vector elements.
60
+ /// That is, for an 8 element vector:
61
+ ///
62
+ /// > ((x0 * x1) * (x2 * x3)) * ((x4 * x5) * (x6 * x7))
63
+ ///
64
+ /// # Integer vectors
65
+ ///
66
+ /// If an operation overflows it returns the mathematical result
67
+ /// modulo `2^n` where `n` is the number of times it overflows.
68
+ ///
69
+ /// # Floating-point vectors
70
+ ///
71
+ /// If one of the vector element is `NaN` the reduction returns
72
+ /// `NaN`.
40
73
#[ cfg( not( target_arch = "aarch64" ) ) ]
41
74
#[ inline]
42
- pub fn product ( self ) -> $elem_ty {
75
+ pub fn wrapping_product ( self ) -> $elem_ty {
43
76
use coresimd:: simd_llvm:: simd_reduce_mul_ordered;
44
77
unsafe { simd_reduce_mul_ordered( self , 1 as $elem_ty) }
45
78
}
46
- /// Lane-wise multiplication of the vector elements.
79
+ /// Horizontal product of the vector elements.
80
+ ///
81
+ /// The intrinsic performs a tree-reduction of the vector elements.
82
+ /// That is, for an 8 element vector:
83
+ ///
84
+ /// > ((x0 * x1) * (x2 * x3)) * ((x4 * x5) * (x6 * x7))
85
+ ///
86
+ /// # Integer vectors
87
+ ///
88
+ /// If an operation overflows it returns the mathematical result
89
+ /// modulo `2^n` where `n` is the number of times it overflows.
90
+ ///
91
+ /// # Floating-point vectors
47
92
///
48
- /// FIXME: document guarantees with respect to:
49
- /// * integers: overflow behavior
50
- /// * floats: order and NaNs
93
+ /// If one of the vector element is `NaN` the reduction returns
94
+ /// `NaN`.
51
95
#[ cfg( target_arch = "aarch64" ) ]
52
96
#[ inline]
53
- pub fn product ( self ) -> $elem_ty {
97
+ pub fn wrapping_product ( self ) -> $elem_ty {
54
98
// FIXME: broken on AArch64
55
99
// https://bugs.llvm.org/show_bug.cgi?id=36796
56
100
let mut x = self . extract( 0 ) as $elem_ty;
@@ -78,25 +122,25 @@ macro_rules! test_arithmetic_reductions {
78
122
}
79
123
80
124
#[ test]
81
- fn sum ( ) {
125
+ fn wrapping_sum ( ) {
82
126
use coresimd:: simd:: $id;
83
127
let v = $id:: splat( 0 as $elem_ty) ;
84
- assert_eq!( v. sum ( ) , 0 as $elem_ty) ;
128
+ assert_eq!( v. wrapping_sum ( ) , 0 as $elem_ty) ;
85
129
let v = $id:: splat( 1 as $elem_ty) ;
86
- assert_eq!( v. sum ( ) , $id:: lanes( ) as $elem_ty) ;
130
+ assert_eq!( v. wrapping_sum ( ) , $id:: lanes( ) as $elem_ty) ;
87
131
let v = alternating( 2 ) ;
88
132
assert_eq!(
89
- v. sum ( ) ,
133
+ v. wrapping_sum ( ) ,
90
134
( $id:: lanes( ) / 2 + $id:: lanes( ) ) as $elem_ty
91
135
) ;
92
136
}
93
137
#[ test]
94
- fn product ( ) {
138
+ fn wrapping_product ( ) {
95
139
use coresimd:: simd:: $id;
96
140
let v = $id:: splat( 0 as $elem_ty) ;
97
- assert_eq!( v. product ( ) , 0 as $elem_ty) ;
141
+ assert_eq!( v. wrapping_product ( ) , 0 as $elem_ty) ;
98
142
let v = $id:: splat( 1 as $elem_ty) ;
99
- assert_eq!( v. product ( ) , 1 as $elem_ty) ;
143
+ assert_eq!( v. wrapping_product ( ) , 1 as $elem_ty) ;
100
144
let f = match $id:: lanes( ) {
101
145
64 => 16 ,
102
146
32 => 8 ,
@@ -105,7 +149,7 @@ macro_rules! test_arithmetic_reductions {
105
149
} ;
106
150
let v = alternating( f) ;
107
151
assert_eq!(
108
- v. product ( ) ,
152
+ v. wrapping_product ( ) ,
109
153
( 2_usize . pow( ( $id:: lanes( ) / f) as u32 ) as $elem_ty)
110
154
) ;
111
155
}
0 commit comments