Skip to content

added f32 and f64 unaligned stores and loads from avx512f set #873

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 6 commits into from
Jul 11, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 2 additions & 4 deletions crates/core_arch/src/x86/avx.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2956,8 +2956,7 @@ pub unsafe fn _mm256_zextpd128_pd256(a: __m128d) -> __m256d {
// This intrinsic has no corresponding instruction.
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm256_undefined_ps() -> __m256 {
// FIXME: this function should return MaybeUninit<__m256>
mem::MaybeUninit::<__m256>::uninit().assume_init()
_mm256_set1_ps(0.0)
}

/// Returns vector of type `__m256d` with undefined elements.
Expand All @@ -2968,8 +2967,7 @@ pub unsafe fn _mm256_undefined_ps() -> __m256 {
// This intrinsic has no corresponding instruction.
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm256_undefined_pd() -> __m256d {
// FIXME: this function should return MaybeUninit<__m256d>
mem::MaybeUninit::<__m256d>::uninit().assume_init()
_mm256_set1_pd(0.0)
}

/// Returns vector of type __m256i with undefined elements.
Expand Down
159 changes: 159 additions & 0 deletions crates/core_arch/src/x86/avx512f.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
use crate::{
core_arch::{simd::*, simd_llvm::*, x86::*},
mem::{self, transmute},
ptr,
};

#[cfg(test)]
Expand Down Expand Up @@ -1633,6 +1634,113 @@ pub unsafe fn _mm512_mask_cmp_epi64_mask(
transmute(r)
}

/// Returns vector of type `__m512d` with undefined elements.
///
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_undefined_pd)
#[inline]
#[target_feature(enable = "avx512f")]
// This intrinsic has no corresponding instruction.
pub unsafe fn _mm512_undefined_pd() -> __m512d {
_mm512_set1_pd(0.0)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is a zero float. I think it should be literal zero bytes. Not sure though.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

A zero float happens to be encoded with all bits zeroed.

}

/// Returns vector of type `__m512` with undefined elements.
///
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_undefined_ps)
#[inline]
#[target_feature(enable = "avx512f")]
// This intrinsic has no corresponding instruction.
pub unsafe fn _mm512_undefined_ps() -> __m512 {
_mm512_set1_ps(0.0)
}

/// Loads 512-bits (composed of 8 packed double-precision (64-bit)
/// floating-point elements) from memory into result.
/// `mem_addr` does not need to be aligned on any particular boundary.
///
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_loadu_pd)
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vmovups))]
pub unsafe fn _mm512_loadu_pd(mem_addr: *const f64) -> __m512d {
ptr::read_unaligned(mem_addr as *const __m512d)
}

/// Stores 512-bits (composed of 8 packed double-precision (64-bit)
/// floating-point elements) from `a` into memory.
/// `mem_addr` does not need to be aligned on any particular boundary.
///
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_storeu_pd)
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vmovups))]
pub unsafe fn _mm512_storeu_pd(mem_addr: *mut f64, a: __m512d) {
ptr::write_unaligned(mem_addr as *mut __m512d, a);
}

/// Loads 512-bits (composed of 16 packed single-precision (32-bit)
/// floating-point elements) from memory into result.
/// `mem_addr` does not need to be aligned on any particular boundary.
///
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_loadu_ps)
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vmovups))]
pub unsafe fn _mm512_loadu_ps(mem_addr: *const f32) -> __m512 {
ptr::read_unaligned(mem_addr as *const __m512)
}

/// Stores 512-bits (composed of 16 packed single-precision (32-bit)
/// floating-point elements) from `a` into memory.
/// `mem_addr` does not need to be aligned on any particular boundary.
///
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_storeu_ps)
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vmovups))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm512_storeu_ps(mem_addr: *mut f32, a: __m512) {
ptr::write_unaligned(mem_addr as *mut __m512, a);
}

/// Sets packed 64-bit integers in `dst` with the supplied values in
/// reverse order.
///
/// [Intel's documentation]( https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,4909&text=_mm512_set_pd)
#[inline]
#[target_feature(enable = "avx512f")]
pub unsafe fn _mm512_setr_pd(
e0: f64,
e1: f64,
e2: f64,
e3: f64,
e4: f64,
e5: f64,
e6: f64,
e7: f64,
) -> __m512d {
let r = f64x8::new(e0, e1, e2, e3, e4, e5, e6, e7);
transmute(r)
}

/// Sets packed 64-bit integers in `dst` with the supplied values.
///
/// [Intel's documentation]( https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,4909&text=_mm512_set_pd)
#[inline]
#[target_feature(enable = "avx512f")]
pub unsafe fn _mm512_set_pd(
e0: f64,
e1: f64,
e2: f64,
e3: f64,
e4: f64,
e5: f64,
e6: f64,
e7: f64,
) -> __m512d {
_mm512_setr_pd(e7, e6, e5, e4, e3, e2, e1, e0)
}

/// Equal
pub const _MM_CMPINT_EQ: _MM_CMPINT_ENUM = 0x00;
/// Less-than
Expand Down Expand Up @@ -1702,6 +1810,7 @@ mod tests {
use stdarch_test::simd_test;

use crate::core_arch::x86::*;
use crate::hint::black_box;

#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_abs_epi32() {
Expand Down Expand Up @@ -2326,4 +2435,54 @@ mod tests {
unsafe fn test_mm512_setzero_ps() {
assert_eq_m512(_mm512_setzero_ps(), _mm512_set1_ps(0.));
}

#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_loadu_pd() {
let a = &[4., 3., 2., 5., 8., 9., 64., 50.];
let p = a.as_ptr();
let r = _mm512_loadu_pd(black_box(p));
let e = _mm512_setr_pd(4., 3., 2., 5., 8., 9., 64., 50.);
assert_eq_m512d(r, e);
}

#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_storeu_pd() {
let a = _mm512_set1_pd(9.);
let mut r = _mm512_undefined_pd();
_mm512_storeu_pd(&mut r as *mut _ as *mut f64, a);
assert_eq_m512d(r, a);
}

#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_loadu_ps() {
let a = &[
4., 3., 2., 5., 8., 9., 64., 50., -4., -3., -2., -5., -8., -9., -64., -50.,
];
let p = a.as_ptr();
let r = _mm512_loadu_ps(black_box(p));
let e = _mm512_setr_ps(
4., 3., 2., 5., 8., 9., 64., 50., -4., -3., -2., -5., -8., -9., -64., -50.,
);
assert_eq_m512(r, e);
}

#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_storeu_ps() {
let a = _mm512_set1_ps(9.);
let mut r = _mm512_undefined_ps();
_mm512_storeu_ps(&mut r as *mut _ as *mut f32, a);
assert_eq_m512(r, a);
}

#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_setr_pd() {
let r = _mm512_set_pd(0., 1., 2., 3., 4., 5., 6., 7.);
assert_eq_m512d(r, _mm512_setr_pd(7., 6., 5., 4., 3., 2., 1., 0.));
}

#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_set_pd() {
let r = _mm512_setr_pd(0., 1., 2., 3., 4., 5., 6., 7.);
assert_eq_m512d(r, _mm512_set_pd(7., 6., 5., 4., 3., 2., 1., 0.));
}
}
3 changes: 1 addition & 2 deletions crates/core_arch/src/x86/sse.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1865,8 +1865,7 @@ pub unsafe fn _mm_prefetch(p: *const i8, strategy: i32) {
#[target_feature(enable = "sse")]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm_undefined_ps() -> __m128 {
// FIXME: this function should return MaybeUninit<__m128>
mem::MaybeUninit::<__m128>::uninit().assume_init()
_mm_set1_ps(0.0)
}

/// Transpose the 4x4 matrix formed by 4 rows of __m128 in place.
Expand Down
50 changes: 0 additions & 50 deletions crates/core_arch/src/x86_64/avx512f.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,44 +3,6 @@ use crate::{
mem::transmute,
};

/// Sets packed 64-bit integers in `dst` with the supplied values.
///
/// [Intel's documentation]( https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,4909&text=_mm512_set_pd)
#[inline]
#[target_feature(enable = "avx512f")]
pub unsafe fn _mm512_set_pd(
e0: f64,
e1: f64,
e2: f64,
e3: f64,
e4: f64,
e5: f64,
e6: f64,
e7: f64,
) -> __m512d {
_mm512_setr_pd(e7, e6, e5, e4, e3, e2, e1, e0)
}

/// Sets packed 64-bit integers in `dst` with the supplied values in
/// reverse order.
///
/// [Intel's documentation]( https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,4909&text=_mm512_set_pd)
#[inline]
#[target_feature(enable = "avx512f")]
pub unsafe fn _mm512_setr_pd(
e0: f64,
e1: f64,
e2: f64,
e3: f64,
e4: f64,
e5: f64,
e6: f64,
e7: f64,
) -> __m512d {
let r = f64x8::new(e0, e1, e2, e3, e4, e5, e6, e7);
transmute(r)
}

/// Sets packed 64-bit integers in `dst` with the supplied values.
///
/// [Intel's documentation]( https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,4909&text=_mm512_set_epi64)
Expand Down Expand Up @@ -311,18 +273,6 @@ mod tests {
assert_eq!(r, 0b01001010);
}

#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_set_pd() {
let r = _mm512_setr_pd(0., 1., 2., 3., 4., 5., 6., 7.);
assert_eq_m512d(r, _mm512_set_pd(7., 6., 5., 4., 3., 2., 1., 0.));
}

#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_setr_pd() {
let r = _mm512_set_pd(0., 1., 2., 3., 4., 5., 6., 7.);
assert_eq_m512d(r, _mm512_setr_pd(7., 6., 5., 4., 3., 2., 1., 0.));
}

#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_set_epi64() {
let r = _mm512_setr_epi64(0, 1, 2, 3, 4, 5, 6, 7);
Expand Down
6 changes: 6 additions & 0 deletions crates/stdarch-verify/tests/x86-intel.rs
Original file line number Diff line number Diff line change
Expand Up @@ -282,6 +282,8 @@ fn verify_all_signatures() {
"_mm_tzcnt_64",
"_fxsave64",
"_fxrstor64",
"_mm512_undefined_ps",
"_mm512_undefined_pd",
];
if !skip.contains(&rust.name) {
println!(
Expand Down Expand Up @@ -625,6 +627,8 @@ fn equate(t: &Type, intel: &str, intrinsic: &str, is_const: bool) -> Result<(),

(&Type::MutPtr(&Type::PrimFloat(32)), "float*") => {}
(&Type::MutPtr(&Type::PrimFloat(64)), "double*") => {}
(&Type::MutPtr(&Type::PrimFloat(32)), "void*") => {}
(&Type::MutPtr(&Type::PrimFloat(64)), "void*") => {}
(&Type::MutPtr(&Type::PrimSigned(32)), "int*") => {}
(&Type::MutPtr(&Type::PrimSigned(32)), "__int32*") => {}
(&Type::MutPtr(&Type::PrimSigned(64)), "__int64*") => {}
Expand All @@ -646,6 +650,8 @@ fn equate(t: &Type, intel: &str, intrinsic: &str, is_const: bool) -> Result<(),

(&Type::ConstPtr(&Type::PrimFloat(32)), "float const*") => {}
(&Type::ConstPtr(&Type::PrimFloat(64)), "double const*") => {}
(&Type::ConstPtr(&Type::PrimFloat(32)), "void const*") => {}
(&Type::ConstPtr(&Type::PrimFloat(64)), "void const*") => {}
(&Type::ConstPtr(&Type::PrimSigned(32)), "int const*") => {}
(&Type::ConstPtr(&Type::PrimSigned(32)), "__int32 const*") => {}
(&Type::ConstPtr(&Type::PrimSigned(64)), "__int64 const*") => {}
Expand Down