documents arithmetic reduction semantics

gnzlbg · gnzlbg · commit 6a4ec7595544 · 2018-04-03T20:05:11.000+02:00
diff --git a/ci/run.sh b/ci/run.sh
@@ -7,7 +7,7 @@ set -ex
 # Tests are all super fast anyway, and they fault often enough on travis that
 # having only one thread increases debuggability to be worth it.
 export RUST_TEST_THREADS=1
-#export RUST_BACKTRACE=1
+export RUST_BACKTRACE=1
 #export RUST_TEST_NOCAPTURE=1
 
 FEATURES="strict,$FEATURES"
diff --git a/coresimd/ppsv/api/arithmetic_reductions.rs b/coresimd/ppsv/api/arithmetic_reductions.rs
@@ -4,58 +4,102 @@
 macro_rules! impl_arithmetic_reductions {
     ($id:ident, $elem_ty:ident) => {
         impl $id {
-            /// Lane-wise addition of the vector elements.
+            /// Horizontal sum of the vector elements.
             ///
-            /// FIXME: document guarantees with respect to:
-            ///    * integers: overflow behavior
-            ///    * floats: order and NaNs
+            /// The intrinsic performs a tree-reduction of the vector elements.
+            /// That is, for an 8 element vector:
+            ///
+            /// > ((x0 + x1) + (x2 + x3)) + ((x4 + x5) + (x6 + x7))
+            ///
+            /// # Integer vectors
+            ///
+            /// If an operation overflows it returns the mathematical result
+            /// modulo `2^n` where `n` is the number of times it overflows.
+            ///
+            /// # Floating-point vectors
+            ///
+            /// If one of the vector element is `NaN` the reduction returns
+            /// `NaN`.
             #[cfg(not(target_arch = "aarch64"))]
             #[inline]
-            pub fn sum(self) -> $elem_ty {
+            pub fn wrapping_sum(self) -> $elem_ty {
                 use coresimd::simd_llvm::simd_reduce_add_ordered;
                 unsafe { simd_reduce_add_ordered(self, 0 as $elem_ty) }
             }
-            /// Lane-wise addition of the vector elements.
+            /// Horizontal sum of the vector elements.
+            ///
+            /// The intrinsic performs a tree-reduction of the vector elements.
+            /// That is, for an 8 element vector:
+            ///
+            /// > ((x0 + x1) + (x2 + x3)) + ((x4 + x5) + (x6 + x7))
+            ///
+            /// # Integer vectors
+            ///
+            /// If an operation overflows it returns the mathematical result
+            /// modulo `2^n` where `n` is the number of times it overflows.
+            ///
+            /// # Floating-point vectors
             ///
-            /// FIXME: document guarantees with respect to:
-            ///    * integers: overflow behavior
-            ///    * floats: order and NaNs
+            /// If one of the vector element is `NaN` the reduction returns
+            /// `NaN`.
             #[cfg(target_arch = "aarch64")]
             #[inline]
-            pub fn sum(self) -> $elem_ty {
+            pub fn wrapping_sum(self) -> $elem_ty {
                 // FIXME: broken on AArch64
                 // https://bugs.llvm.org/show_bug.cgi?id=36796
                 let mut x = self.extract(0) as $elem_ty;
                 for i in 1..$id::lanes() {
-                    x += self.extract(i) as $elem_ty;
+                    x = x.wrapping_add(self.extract(i) as $elem_ty);
                 }
                 x
             }
 
-            /// Lane-wise multiplication of the vector elements.
+            /// Horizontal product of the vector elements.
             ///
-            /// FIXME: document guarantees with respect to:
-            ///    * integers: overflow behavior
-            ///    * floats: order and NaNs
+            /// The intrinsic performs a tree-reduction of the vector elements.
+            /// That is, for an 8 element vector:
+            ///
+            /// > ((x0 * x1) * (x2 * x3)) * ((x4 * x5) * (x6 * x7))
+            ///
+            /// # Integer vectors
+            ///
+            /// If an operation overflows it returns the mathematical result
+            /// modulo `2^n` where `n` is the number of times it overflows.
+            ///
+            /// # Floating-point vectors
+            ///
+            /// If one of the vector element is `NaN` the reduction returns
+            /// `NaN`.
             #[cfg(not(target_arch = "aarch64"))]
             #[inline]
-            pub fn product(self) -> $elem_ty {
+            pub fn wrapping_product(self) -> $elem_ty {
                 use coresimd::simd_llvm::simd_reduce_mul_ordered;
                 unsafe { simd_reduce_mul_ordered(self, 1 as $elem_ty) }
             }
-            /// Lane-wise multiplication of the vector elements.
+            /// Horizontal product of the vector elements.
+            ///
+            /// The intrinsic performs a tree-reduction of the vector elements.
+            /// That is, for an 8 element vector:
+            ///
+            /// > ((x0 * x1) * (x2 * x3)) * ((x4 * x5) * (x6 * x7))
+            ///
+            /// # Integer vectors
+            ///
+            /// If an operation overflows it returns the mathematical result
+            /// modulo `2^n` where `n` is the number of times it overflows.
+            ///
+            /// # Floating-point vectors
             ///
-            /// FIXME: document guarantees with respect to:
-            ///    * integers: overflow behavior
-            ///    * floats: order and NaNs
+            /// If one of the vector element is `NaN` the reduction returns
+            /// `NaN`.
             #[cfg(target_arch = "aarch64")]
             #[inline]
-            pub fn product(self) -> $elem_ty {
+            pub fn wrapping_product(self) -> $elem_ty {
                 // FIXME: broken on AArch64
                 // https://bugs.llvm.org/show_bug.cgi?id=36796
                 let mut x = self.extract(0) as $elem_ty;
                 for i in 1..$id::lanes() {
-                    x *= self.extract(i) as $elem_ty;
+                    x = x.wrapping_mul(self.extract(i) as $elem_ty);
                 }
                 x
             }
@@ -78,25 +122,25 @@ macro_rules! test_arithmetic_reductions {
         }
 
         #[test]
-        fn sum() {
+        fn wrapping_sum() {
             use coresimd::simd::$id;
             let v = $id::splat(0 as $elem_ty);
-            assert_eq!(v.sum(), 0 as $elem_ty);
+            assert_eq!(v.wrapping_sum(), 0 as $elem_ty);
             let v = $id::splat(1 as $elem_ty);
-            assert_eq!(v.sum(), $id::lanes() as $elem_ty);
+            assert_eq!(v.wrapping_sum(), $id::lanes() as $elem_ty);
             let v = alternating(2);
             assert_eq!(
-                v.sum(),
+                v.wrapping_sum(),
                 ($id::lanes() / 2 + $id::lanes()) as $elem_ty
             );
         }
         #[test]
-        fn product() {
+        fn wrapping_product() {
             use coresimd::simd::$id;
             let v = $id::splat(0 as $elem_ty);
-            assert_eq!(v.product(), 0 as $elem_ty);
+            assert_eq!(v.wrapping_product(), 0 as $elem_ty);
             let v = $id::splat(1 as $elem_ty);
-            assert_eq!(v.product(), 1 as $elem_ty);
+            assert_eq!(v.wrapping_product(), 1 as $elem_ty);
             let f = match $id::lanes() {
                 64 => 16,
                 32 => 8,
@@ -105,7 +149,7 @@ macro_rules! test_arithmetic_reductions {
             };
             let v = alternating(f);
             assert_eq!(
-                v.product(),
+                v.wrapping_product(),
                 (2_usize.pow(($id::lanes() / f) as u32) as $elem_ty)
             );
         }
diff --git a/coresimd/ppsv/api/minmax_reductions.rs b/coresimd/ppsv/api/minmax_reductions.rs
@@ -4,22 +4,19 @@
 macro_rules! impl_minmax_reductions {
     ($id:ident, $elem_ty:ident) => {
         impl $id {
-            /// Largest vector value.
-            ///
-            /// FIXME: document behavior for float vectors with NaNs.
+            /// Largest vector element value.
             #[cfg(not(target_arch = "aarch64"))]
             #[inline]
-            pub fn max(self) -> $elem_ty {
+            pub fn max_element(self) -> $elem_ty {
                 use coresimd::simd_llvm::simd_reduce_max;
                 unsafe { simd_reduce_max(self) }
             }
-            /// Largest vector value.
-            ///
-            /// FIXME: document behavior for float vectors with NaNs.
+
+            /// Largest vector element value.
             #[cfg(target_arch = "aarch64")]
             #[allow(unused_imports)]
             #[inline]
-            pub fn max(self) -> $elem_ty {
+            pub fn max_element(self) -> $elem_ty {
                 // FIXME: broken on AArch64
                 // https://bugs.llvm.org/show_bug.cgi?id=36796
                 use cmp::Ord;
@@ -31,22 +28,19 @@ macro_rules! impl_minmax_reductions {
                 x
             }
 
-            /// Smallest vector value.
-            ///
-            /// FIXME: document behavior for float vectors with NaNs.
+            /// Smallest vector element value.
             #[cfg(not(target_arch = "aarch64"))]
             #[inline]
-            pub fn min(self) -> $elem_ty {
+            pub fn min_element(self) -> $elem_ty {
                 use coresimd::simd_llvm::simd_reduce_min;
                 unsafe { simd_reduce_min(self) }
             }
-            /// Smallest vector value.
-            ///
-            /// FIXME: document behavior for float vectors with NaNs.
+
+            /// Smallest vector element value.
             #[cfg(target_arch = "aarch64")]
             #[allow(unused_imports)]
             #[inline]
-            pub fn min(self) -> $elem_ty {
+            pub fn min_element(self) -> $elem_ty {
                 // FIXME: broken on AArch64
                 // https://bugs.llvm.org/show_bug.cgi?id=36796
                 use cmp::Ord;
@@ -65,29 +59,29 @@ macro_rules! impl_minmax_reductions {
 macro_rules! test_minmax_reductions {
     ($id:ident, $elem_ty:ident) => {
         #[test]
-        fn max() {
+        fn max_element() {
             use coresimd::simd::$id;
             let v = $id::splat(0 as $elem_ty);
-            assert_eq!(v.max(), 0 as $elem_ty);
+            assert_eq!(v.max_element(), 0 as $elem_ty);
             let v = v.replace(1, 1 as $elem_ty);
-            assert_eq!(v.max(), 1 as $elem_ty);
+            assert_eq!(v.max_element(), 1 as $elem_ty);
             let v = v.replace(0, 2 as $elem_ty);
-            assert_eq!(v.max(), 2 as $elem_ty);
+            assert_eq!(v.max_element(), 2 as $elem_ty);
         }
 
         #[test]
-        fn min() {
+        fn min_element() {
             use coresimd::simd::$id;
             let v = $id::splat(0 as $elem_ty);
-            assert_eq!(v.min(), 0 as $elem_ty);
+            assert_eq!(v.min_element(), 0 as $elem_ty);
             let v = v.replace(1, 1 as $elem_ty);
-            assert_eq!(v.min(), 0 as $elem_ty);
+            assert_eq!(v.min_element(), 0 as $elem_ty);
             let v = $id::splat(1 as $elem_ty);
             let v = v.replace(0, 2 as $elem_ty);
-            assert_eq!(v.min(), 1 as $elem_ty);
+            assert_eq!(v.min_element(), 1 as $elem_ty);
             let v = $id::splat(2 as $elem_ty);
             let v = v.replace(1, 1 as $elem_ty);
-            assert_eq!(v.min(), 1 as $elem_ty);
+            assert_eq!(v.min_element(), 1 as $elem_ty);
         }
     };
 }
diff --git a/crates/coresimd/tests/reductions.rs b/crates/coresimd/tests/reductions.rs