Skip to content

Documents arithmetic reduction semantics #412

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 4 commits into from
Apr 5, 2018
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion ci/run.sh
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ set -ex
# Tests are all super fast anyway, and they fault often enough on travis that
# having only one thread increases debuggability to be worth it.
export RUST_TEST_THREADS=1
#export RUST_BACKTRACE=1
#export RUST_BACKTRACE=full
#export RUST_TEST_NOCAPTURE=1

FEATURES="strict,$FEATURES"
Expand Down
106 changes: 76 additions & 30 deletions coresimd/ppsv/api/arithmetic_reductions.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,58 +4,104 @@
macro_rules! impl_arithmetic_reductions {
($id:ident, $elem_ty:ident) => {
impl $id {
/// Lane-wise addition of the vector elements.
/// Horizontal sum of the vector elements.
///
/// FIXME: document guarantees with respect to:
/// * integers: overflow behavior
/// * floats: order and NaNs
/// The intrinsic performs a tree-reduction of the vector elements.
/// That is, for an 8 element vector:
///
/// > ((x0 + x1) + (x2 + x3)) + ((x4 + x5) + (x6 + x7))
///
/// # Integer vectors
///
/// If an operation overflows it returns the mathematical result
/// modulo `2^n` where `n` is the number of times it overflows.
///
/// # Floating-point vectors
///
/// If one of the vector element is `NaN` the reduction returns
/// `NaN`.
#[cfg(not(target_arch = "aarch64"))]
#[inline]
pub fn sum(self) -> $elem_ty {
pub fn wrapping_sum(self) -> $elem_ty {
use coresimd::simd_llvm::simd_reduce_add_ordered;
unsafe { simd_reduce_add_ordered(self, 0 as $elem_ty) }
}
/// Lane-wise addition of the vector elements.
/// Horizontal sum of the vector elements.
///
/// The intrinsic performs a tree-reduction of the vector elements.
/// That is, for an 8 element vector:
///
/// > ((x0 + x1) + (x2 + x3)) + ((x4 + x5) + (x6 + x7))
///
/// # Integer vectors
///
/// If an operation overflows it returns the mathematical result
/// modulo `2^n` where `n` is the number of times it overflows.
///
/// # Floating-point vectors
///
/// FIXME: document guarantees with respect to:
/// * integers: overflow behavior
/// * floats: order and NaNs
/// If one of the vector element is `NaN` the reduction returns
/// `NaN`.
#[cfg(target_arch = "aarch64")]
#[inline]
pub fn sum(self) -> $elem_ty {
pub fn wrapping_sum(self) -> $elem_ty {
// FIXME: broken on AArch64
// https://bugs.llvm.org/show_bug.cgi?id=36796
use super::codegen::wrapping::Wrapping;
let mut x = self.extract(0) as $elem_ty;
for i in 1..$id::lanes() {
x += self.extract(i) as $elem_ty;
x = Wrapping::add(x, self.extract(i) as $elem_ty);
}
x
}

/// Lane-wise multiplication of the vector elements.
/// Horizontal product of the vector elements.
///
/// FIXME: document guarantees with respect to:
/// * integers: overflow behavior
/// * floats: order and NaNs
/// The intrinsic performs a tree-reduction of the vector elements.
/// That is, for an 8 element vector:
///
/// > ((x0 * x1) * (x2 * x3)) * ((x4 * x5) * (x6 * x7))
///
/// # Integer vectors
///
/// If an operation overflows it returns the mathematical result
/// modulo `2^n` where `n` is the number of times it overflows.
///
/// # Floating-point vectors
///
/// If one of the vector element is `NaN` the reduction returns
/// `NaN`.
#[cfg(not(target_arch = "aarch64"))]
#[inline]
pub fn product(self) -> $elem_ty {
pub fn wrapping_product(self) -> $elem_ty {
use coresimd::simd_llvm::simd_reduce_mul_ordered;
unsafe { simd_reduce_mul_ordered(self, 1 as $elem_ty) }
}
/// Lane-wise multiplication of the vector elements.
/// Horizontal product of the vector elements.
///
/// The intrinsic performs a tree-reduction of the vector elements.
/// That is, for an 8 element vector:
///
/// > ((x0 * x1) * (x2 * x3)) * ((x4 * x5) * (x6 * x7))
///
/// # Integer vectors
///
/// If an operation overflows it returns the mathematical result
/// modulo `2^n` where `n` is the number of times it overflows.
///
/// # Floating-point vectors
///
/// FIXME: document guarantees with respect to:
/// * integers: overflow behavior
/// * floats: order and NaNs
/// If one of the vector element is `NaN` the reduction returns
/// `NaN`.
#[cfg(target_arch = "aarch64")]
#[inline]
pub fn product(self) -> $elem_ty {
pub fn wrapping_product(self) -> $elem_ty {
// FIXME: broken on AArch64
// https://bugs.llvm.org/show_bug.cgi?id=36796
use super::codegen::wrapping::Wrapping;
let mut x = self.extract(0) as $elem_ty;
for i in 1..$id::lanes() {
x *= self.extract(i) as $elem_ty;
x = Wrapping::mul(x, self.extract(i) as $elem_ty);
}
x
}
Expand All @@ -78,25 +124,25 @@ macro_rules! test_arithmetic_reductions {
}

#[test]
fn sum() {
fn wrapping_sum() {
use coresimd::simd::$id;
let v = $id::splat(0 as $elem_ty);
assert_eq!(v.sum(), 0 as $elem_ty);
assert_eq!(v.wrapping_sum(), 0 as $elem_ty);
let v = $id::splat(1 as $elem_ty);
assert_eq!(v.sum(), $id::lanes() as $elem_ty);
assert_eq!(v.wrapping_sum(), $id::lanes() as $elem_ty);
let v = alternating(2);
assert_eq!(
v.sum(),
v.wrapping_sum(),
($id::lanes() / 2 + $id::lanes()) as $elem_ty
);
}
#[test]
fn product() {
fn wrapping_product() {
use coresimd::simd::$id;
let v = $id::splat(0 as $elem_ty);
assert_eq!(v.product(), 0 as $elem_ty);
assert_eq!(v.wrapping_product(), 0 as $elem_ty);
let v = $id::splat(1 as $elem_ty);
assert_eq!(v.product(), 1 as $elem_ty);
assert_eq!(v.wrapping_product(), 1 as $elem_ty);
let f = match $id::lanes() {
64 => 16,
32 => 8,
Expand All @@ -105,7 +151,7 @@ macro_rules! test_arithmetic_reductions {
};
let v = alternating(f);
assert_eq!(
v.product(),
v.wrapping_product(),
(2_usize.pow(($id::lanes() / f) as u32) as $elem_ty)
);
}
Expand Down
52 changes: 23 additions & 29 deletions coresimd/ppsv/api/minmax_reductions.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,22 +4,19 @@
macro_rules! impl_minmax_reductions {
($id:ident, $elem_ty:ident) => {
impl $id {
/// Largest vector value.
///
/// FIXME: document behavior for float vectors with NaNs.
#[cfg(not(target_arch = "aarch64"))]
/// Largest vector element value.
#[cfg(not(any(target_arch = "aarch64", target_arch = "arm")))]
#[inline]
pub fn max(self) -> $elem_ty {
pub fn max_element(self) -> $elem_ty {
use coresimd::simd_llvm::simd_reduce_max;
unsafe { simd_reduce_max(self) }
}
/// Largest vector value.
///
/// FIXME: document behavior for float vectors with NaNs.
#[cfg(target_arch = "aarch64")]

/// Largest vector element value.
#[cfg(any(target_arch = "aarch64", target_arch = "arm"))]
#[allow(unused_imports)]
#[inline]
pub fn max(self) -> $elem_ty {
pub fn max_element(self) -> $elem_ty {
// FIXME: broken on AArch64
// https://bugs.llvm.org/show_bug.cgi?id=36796
use cmp::Ord;
Expand All @@ -31,22 +28,19 @@ macro_rules! impl_minmax_reductions {
x
}

/// Smallest vector value.
///
/// FIXME: document behavior for float vectors with NaNs.
#[cfg(not(target_arch = "aarch64"))]
/// Smallest vector element value.
#[cfg(not(any(target_arch = "aarch64", target_arch = "arm")))]
#[inline]
pub fn min(self) -> $elem_ty {
pub fn min_element(self) -> $elem_ty {
use coresimd::simd_llvm::simd_reduce_min;
unsafe { simd_reduce_min(self) }
}
/// Smallest vector value.
///
/// FIXME: document behavior for float vectors with NaNs.
#[cfg(target_arch = "aarch64")]

/// Smallest vector element value.
#[cfg(any(target_arch = "aarch64", target_arch = "arm"))]
#[allow(unused_imports)]
#[inline]
pub fn min(self) -> $elem_ty {
pub fn min_element(self) -> $elem_ty {
// FIXME: broken on AArch64
// https://bugs.llvm.org/show_bug.cgi?id=36796
use cmp::Ord;
Expand All @@ -65,29 +59,29 @@ macro_rules! impl_minmax_reductions {
macro_rules! test_minmax_reductions {
($id:ident, $elem_ty:ident) => {
#[test]
fn max() {
fn max_element() {
use coresimd::simd::$id;
let v = $id::splat(0 as $elem_ty);
assert_eq!(v.max(), 0 as $elem_ty);
assert_eq!(v.max_element(), 0 as $elem_ty);
let v = v.replace(1, 1 as $elem_ty);
assert_eq!(v.max(), 1 as $elem_ty);
assert_eq!(v.max_element(), 1 as $elem_ty);
let v = v.replace(0, 2 as $elem_ty);
assert_eq!(v.max(), 2 as $elem_ty);
assert_eq!(v.max_element(), 2 as $elem_ty);
}

#[test]
fn min() {
fn min_element() {
use coresimd::simd::$id;
let v = $id::splat(0 as $elem_ty);
assert_eq!(v.min(), 0 as $elem_ty);
assert_eq!(v.min_element(), 0 as $elem_ty);
let v = v.replace(1, 1 as $elem_ty);
assert_eq!(v.min(), 0 as $elem_ty);
assert_eq!(v.min_element(), 0 as $elem_ty);
let v = $id::splat(1 as $elem_ty);
let v = v.replace(0, 2 as $elem_ty);
assert_eq!(v.min(), 1 as $elem_ty);
assert_eq!(v.min_element(), 1 as $elem_ty);
let v = $id::splat(2 as $elem_ty);
let v = v.replace(1, 1 as $elem_ty);
assert_eq!(v.min(), 1 as $elem_ty);
assert_eq!(v.min_element(), 1 as $elem_ty);
}
};
}
49 changes: 49 additions & 0 deletions coresimd/ppsv/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -78,3 +78,52 @@ impl<T> FromBits<T> for T {
t
}
}

/// Workarounds code generation issues.
#[cfg(target_arch = "aarch64")]
mod codegen {
#[cfg(target_arch = "aarch64")]
pub mod wrapping {
pub trait Wrapping {
fn add(self, other: Self) -> Self;
fn mul(self, other: Self) -> Self;
}

macro_rules! int_impl {
($id:ident) => {
impl Wrapping for $id {
fn add(self, other: Self) -> Self {
self.wrapping_add(other)
}
fn mul(self, other: Self) -> Self {
self.wrapping_mul(other)
}
}
};
}
int_impl!(i8);
int_impl!(i16);
int_impl!(i32);
int_impl!(i64);
int_impl!(u8);
int_impl!(u16);
int_impl!(u32);
int_impl!(u64);

macro_rules! float_impl {
($id:ident) => {
impl Wrapping for $id {
fn add(self, other: Self) -> Self {
self + other
}
fn mul(self, other: Self) -> Self {
self * other
}
}
};
}
float_impl!(f32);
float_impl!(f64);
}

}
Loading