Documents arithmetic reduction semantics (#412)

gnzlbg · web-flow · commit 65740ab9b9c4 · 2018-04-05T19:36:04.000+02:00
* documents arithmetic reduction semantics
diff --git a/ci/run.sh b/ci/run.sh
@@ -7,7 +7,7 @@ set -ex
 # Tests are all super fast anyway, and they fault often enough on travis that
 # having only one thread increases debuggability to be worth it.
 export RUST_TEST_THREADS=1
-#export RUST_BACKTRACE=1
+#export RUST_BACKTRACE=full
 #export RUST_TEST_NOCAPTURE=1
 
 FEATURES="strict,$FEATURES"
diff --git a/coresimd/ppsv/api/arithmetic_reductions.rs b/coresimd/ppsv/api/arithmetic_reductions.rs
@@ -4,58 +4,104 @@
 macro_rules! impl_arithmetic_reductions {
     ($id:ident, $elem_ty:ident) => {
         impl $id {
-            /// Lane-wise addition of the vector elements.
+            /// Horizontal sum of the vector elements.
             ///
-            /// FIXME: document guarantees with respect to:
-            ///    * integers: overflow behavior
-            ///    * floats: order and NaNs
+            /// The intrinsic performs a tree-reduction of the vector elements.
+            /// That is, for an 8 element vector:
+            ///
+            /// > ((x0 + x1) + (x2 + x3)) + ((x4 + x5) + (x6 + x7))
+            ///
+            /// # Integer vectors
+            ///
+            /// If an operation overflows it returns the mathematical result
+            /// modulo `2^n` where `n` is the number of times it overflows.
+            ///
+            /// # Floating-point vectors
+            ///
+            /// If one of the vector element is `NaN` the reduction returns
+            /// `NaN`.
             #[cfg(not(target_arch = "aarch64"))]
             #[inline]
-            pub fn sum(self) -> $elem_ty {
+            pub fn wrapping_sum(self) -> $elem_ty {
                 use coresimd::simd_llvm::simd_reduce_add_ordered;
                 unsafe { simd_reduce_add_ordered(self, 0 as $elem_ty) }
             }
-            /// Lane-wise addition of the vector elements.
+            /// Horizontal sum of the vector elements.
+            ///
+            /// The intrinsic performs a tree-reduction of the vector elements.
+            /// That is, for an 8 element vector:
+            ///
+            /// > ((x0 + x1) + (x2 + x3)) + ((x4 + x5) + (x6 + x7))
+            ///
+            /// # Integer vectors
+            ///
+            /// If an operation overflows it returns the mathematical result
+            /// modulo `2^n` where `n` is the number of times it overflows.
+            ///
+            /// # Floating-point vectors
             ///
-            /// FIXME: document guarantees with respect to:
-            ///    * integers: overflow behavior
-            ///    * floats: order and NaNs
+            /// If one of the vector element is `NaN` the reduction returns
+            /// `NaN`.
             #[cfg(target_arch = "aarch64")]
             #[inline]
-            pub fn sum(self) -> $elem_ty {
+            pub fn wrapping_sum(self) -> $elem_ty {
                 // FIXME: broken on AArch64
                 // https://bugs.llvm.org/show_bug.cgi?id=36796
+                use super::codegen::wrapping::Wrapping;
                 let mut x = self.extract(0) as $elem_ty;
                 for i in 1..$id::lanes() {
-                    x += self.extract(i) as $elem_ty;
+                    x = Wrapping::add(x, self.extract(i) as $elem_ty);
                 }
                 x
             }
 
-            /// Lane-wise multiplication of the vector elements.
+            /// Horizontal product of the vector elements.
             ///
-            /// FIXME: document guarantees with respect to:
-            ///    * integers: overflow behavior
-            ///    * floats: order and NaNs
+            /// The intrinsic performs a tree-reduction of the vector elements.
+            /// That is, for an 8 element vector:
+            ///
+            /// > ((x0 * x1) * (x2 * x3)) * ((x4 * x5) * (x6 * x7))
+            ///
+            /// # Integer vectors
+            ///
+            /// If an operation overflows it returns the mathematical result
+            /// modulo `2^n` where `n` is the number of times it overflows.
+            ///
+            /// # Floating-point vectors
+            ///
+            /// If one of the vector element is `NaN` the reduction returns
+            /// `NaN`.
             #[cfg(not(target_arch = "aarch64"))]
             #[inline]
-            pub fn product(self) -> $elem_ty {
+            pub fn wrapping_product(self) -> $elem_ty {
                 use coresimd::simd_llvm::simd_reduce_mul_ordered;
                 unsafe { simd_reduce_mul_ordered(self, 1 as $elem_ty) }
             }
-            /// Lane-wise multiplication of the vector elements.
+            /// Horizontal product of the vector elements.
+            ///
+            /// The intrinsic performs a tree-reduction of the vector elements.
+            /// That is, for an 8 element vector:
+            ///
+            /// > ((x0 * x1) * (x2 * x3)) * ((x4 * x5) * (x6 * x7))
+            ///
+            /// # Integer vectors
+            ///
+            /// If an operation overflows it returns the mathematical result
+            /// modulo `2^n` where `n` is the number of times it overflows.
+            ///
+            /// # Floating-point vectors
             ///
-            /// FIXME: document guarantees with respect to:
-            ///    * integers: overflow behavior
-            ///    * floats: order and NaNs
+            /// If one of the vector element is `NaN` the reduction returns
+            /// `NaN`.
             #[cfg(target_arch = "aarch64")]
             #[inline]
-            pub fn product(self) -> $elem_ty {
+            pub fn wrapping_product(self) -> $elem_ty {
                 // FIXME: broken on AArch64
                 // https://bugs.llvm.org/show_bug.cgi?id=36796
+                use super::codegen::wrapping::Wrapping;
                 let mut x = self.extract(0) as $elem_ty;
                 for i in 1..$id::lanes() {
-                    x *= self.extract(i) as $elem_ty;
+                    x = Wrapping::mul(x, self.extract(i) as $elem_ty);
                 }
                 x
             }
@@ -78,25 +124,25 @@ macro_rules! test_arithmetic_reductions {
         }
 
         #[test]
-        fn sum() {
+        fn wrapping_sum() {
             use coresimd::simd::$id;
             let v = $id::splat(0 as $elem_ty);
-            assert_eq!(v.sum(), 0 as $elem_ty);
+            assert_eq!(v.wrapping_sum(), 0 as $elem_ty);
             let v = $id::splat(1 as $elem_ty);
-            assert_eq!(v.sum(), $id::lanes() as $elem_ty);
+            assert_eq!(v.wrapping_sum(), $id::lanes() as $elem_ty);
             let v = alternating(2);
             assert_eq!(
-                v.sum(),
+                v.wrapping_sum(),
                 ($id::lanes() / 2 + $id::lanes()) as $elem_ty
             );
         }
         #[test]
-        fn product() {
+        fn wrapping_product() {
             use coresimd::simd::$id;
             let v = $id::splat(0 as $elem_ty);
-            assert_eq!(v.product(), 0 as $elem_ty);
+            assert_eq!(v.wrapping_product(), 0 as $elem_ty);
             let v = $id::splat(1 as $elem_ty);
-            assert_eq!(v.product(), 1 as $elem_ty);
+            assert_eq!(v.wrapping_product(), 1 as $elem_ty);
             let f = match $id::lanes() {
                 64 => 16,
                 32 => 8,
@@ -105,7 +151,7 @@ macro_rules! test_arithmetic_reductions {
             };
             let v = alternating(f);
             assert_eq!(
-                v.product(),
+                v.wrapping_product(),
                 (2_usize.pow(($id::lanes() / f) as u32) as $elem_ty)
             );
         }
diff --git a/coresimd/ppsv/api/minmax_reductions.rs b/coresimd/ppsv/api/minmax_reductions.rs
@@ -4,22 +4,19 @@
 macro_rules! impl_minmax_reductions {
     ($id:ident, $elem_ty:ident) => {
         impl $id {
-            /// Largest vector value.
-            ///
-            /// FIXME: document behavior for float vectors with NaNs.
-            #[cfg(not(target_arch = "aarch64"))]
+            /// Largest vector element value.
+            #[cfg(not(any(target_arch = "aarch64", target_arch = "arm")))]
             #[inline]
-            pub fn max(self) -> $elem_ty {
+            pub fn max_element(self) -> $elem_ty {
                 use coresimd::simd_llvm::simd_reduce_max;
                 unsafe { simd_reduce_max(self) }
             }
-            /// Largest vector value.
-            ///
-            /// FIXME: document behavior for float vectors with NaNs.
-            #[cfg(target_arch = "aarch64")]
+
+            /// Largest vector element value.
+            #[cfg(any(target_arch = "aarch64", target_arch = "arm"))]
             #[allow(unused_imports)]
             #[inline]
-            pub fn max(self) -> $elem_ty {
+            pub fn max_element(self) -> $elem_ty {
                 // FIXME: broken on AArch64
                 // https://bugs.llvm.org/show_bug.cgi?id=36796
                 use cmp::Ord;
@@ -31,22 +28,19 @@ macro_rules! impl_minmax_reductions {
                 x
             }
 
-            /// Smallest vector value.
-            ///
-            /// FIXME: document behavior for float vectors with NaNs.
-            #[cfg(not(target_arch = "aarch64"))]
+            /// Smallest vector element value.
+            #[cfg(not(any(target_arch = "aarch64", target_arch = "arm")))]
             #[inline]
-            pub fn min(self) -> $elem_ty {
+            pub fn min_element(self) -> $elem_ty {
                 use coresimd::simd_llvm::simd_reduce_min;
                 unsafe { simd_reduce_min(self) }
             }
-            /// Smallest vector value.
-            ///
-            /// FIXME: document behavior for float vectors with NaNs.
-            #[cfg(target_arch = "aarch64")]
+
+            /// Smallest vector element value.
+            #[cfg(any(target_arch = "aarch64", target_arch = "arm"))]
             #[allow(unused_imports)]
             #[inline]
-            pub fn min(self) -> $elem_ty {
+            pub fn min_element(self) -> $elem_ty {
                 // FIXME: broken on AArch64
                 // https://bugs.llvm.org/show_bug.cgi?id=36796
                 use cmp::Ord;
@@ -65,29 +59,29 @@ macro_rules! impl_minmax_reductions {
 macro_rules! test_minmax_reductions {
     ($id:ident, $elem_ty:ident) => {
         #[test]
-        fn max() {
+        fn max_element() {
             use coresimd::simd::$id;
             let v = $id::splat(0 as $elem_ty);
-            assert_eq!(v.max(), 0 as $elem_ty);
+            assert_eq!(v.max_element(), 0 as $elem_ty);
             let v = v.replace(1, 1 as $elem_ty);
-            assert_eq!(v.max(), 1 as $elem_ty);
+            assert_eq!(v.max_element(), 1 as $elem_ty);
             let v = v.replace(0, 2 as $elem_ty);
-            assert_eq!(v.max(), 2 as $elem_ty);
+            assert_eq!(v.max_element(), 2 as $elem_ty);
         }
 
         #[test]
-        fn min() {
+        fn min_element() {
             use coresimd::simd::$id;
             let v = $id::splat(0 as $elem_ty);
-            assert_eq!(v.min(), 0 as $elem_ty);
+            assert_eq!(v.min_element(), 0 as $elem_ty);
             let v = v.replace(1, 1 as $elem_ty);
-            assert_eq!(v.min(), 0 as $elem_ty);
+            assert_eq!(v.min_element(), 0 as $elem_ty);
             let v = $id::splat(1 as $elem_ty);
             let v = v.replace(0, 2 as $elem_ty);
-            assert_eq!(v.min(), 1 as $elem_ty);
+            assert_eq!(v.min_element(), 1 as $elem_ty);
             let v = $id::splat(2 as $elem_ty);
             let v = v.replace(1, 1 as $elem_ty);
-            assert_eq!(v.min(), 1 as $elem_ty);
+            assert_eq!(v.min_element(), 1 as $elem_ty);
         }
     };
 }
diff --git a/coresimd/ppsv/mod.rs b/coresimd/ppsv/mod.rs
@@ -78,3 +78,52 @@ impl<T> FromBits<T> for T {
         t
     }
 }
+
+/// Workarounds code generation issues.
+#[cfg(target_arch = "aarch64")]
+mod codegen {
+    #[cfg(target_arch = "aarch64")]
+    pub mod wrapping {
+        pub trait Wrapping {
+            fn add(self, other: Self) -> Self;
+            fn mul(self, other: Self) -> Self;
+        }
+
+        macro_rules! int_impl {
+            ($id:ident) => {
+                impl Wrapping for $id {
+                    fn add(self, other: Self) -> Self {
+                        self.wrapping_add(other)
+                    }
+                    fn mul(self, other: Self) -> Self {
+                        self.wrapping_mul(other)
+                    }
+                }
+            };
+        }
+        int_impl!(i8);
+        int_impl!(i16);
+        int_impl!(i32);
+        int_impl!(i64);
+        int_impl!(u8);
+        int_impl!(u16);
+        int_impl!(u32);
+        int_impl!(u64);
+
+        macro_rules! float_impl {
+            ($id:ident) => {
+                impl Wrapping for $id {
+                    fn add(self, other: Self) -> Self {
+                        self + other
+                    }
+                    fn mul(self, other: Self) -> Self {
+                        self * other
+                    }
+                }
+            };
+        }
+        float_impl!(f32);
+        float_impl!(f64);
+    }
+
+}
diff --git a/crates/coresimd/tests/reductions.rs b/crates/coresimd/tests/reductions.rs