Skip to content

Commit eeadf05

Browse files
committed
added f32 and f64 unaligned stores and loads from avx512f set
1 parent a371069 commit eeadf05

File tree

2 files changed

+138
-0
lines changed

2 files changed

+138
-0
lines changed

crates/core_arch/src/x86/avx512f.rs

Lines changed: 132 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
use crate::{
22
core_arch::{simd::*, simd_llvm::*, x86::*},
33
mem::{self, transmute},
4+
ptr,
45
};
56

67
#[cfg(test)]
@@ -1633,6 +1634,97 @@ pub unsafe fn _mm512_mask_cmp_epi64_mask(
16331634
transmute(r)
16341635
}
16351636

1637+
/// Returns vector of type `__m512d` with undefined elements.
1638+
///
1639+
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_undefined_pd)
1640+
#[inline]
1641+
#[target_feature(enable = "avx512f")]
1642+
// This intrinsic has no corresponding instruction.
1643+
pub unsafe fn _mm512_undefined_pd() -> __m512d {
1644+
// FIXME: this function should return MaybeUninit<__m512d>
1645+
mem::MaybeUninit::<__m512d>::uninit().assume_init()
1646+
}
1647+
1648+
/// Returns vector of type `__m512` with undefined elements.
1649+
///
1650+
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_undefined_ps)
1651+
#[inline]
1652+
#[target_feature(enable = "avx512f")]
1653+
// This intrinsic has no corresponding instruction.
1654+
pub unsafe fn _mm512_undefined_ps() -> __m512 {
1655+
// FIXME: this function should return MaybeUninit<__m512>
1656+
mem::MaybeUninit::<__m512>::uninit().assume_init()
1657+
}
1658+
1659+
/// Loads 512-bits (composed of 8 packed double-precision (64-bit)
1660+
/// floating-point elements) from memory into result.
1661+
/// `mem_addr` does not need to be aligned on any particular boundary.
1662+
///
1663+
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_loadu_pd)
1664+
#[inline]
1665+
#[target_feature(enable = "avx512f")]
1666+
#[cfg_attr(test, assert_instr(vmovupd))]
1667+
pub unsafe fn _mm512_loadu_pd(mem_addr: *const f64) -> __m512d {
1668+
let mut dst = _mm512_undefined_pd();
1669+
ptr::copy_nonoverlapping(
1670+
mem_addr as *const u8,
1671+
&mut dst as *mut __m512d as *mut u8,
1672+
mem::size_of::<__m512d>(),
1673+
);
1674+
dst
1675+
}
1676+
1677+
/// Stores 512-bits (composed of 8 packed double-precision (64-bit)
1678+
/// floating-point elements) from `a` into memory.
1679+
/// `mem_addr` does not need to be aligned on any particular boundary.
1680+
///
1681+
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_storeu_pd)
1682+
#[inline]
1683+
#[target_feature(enable = "avx512f")]
1684+
#[cfg_attr(test, assert_instr(vmovupd))]
1685+
pub unsafe fn _mm512_storeu_pd(mem_addr: *mut f64, a: __m512d) {
1686+
ptr::copy_nonoverlapping(
1687+
&a as *const __m512d as *const u8,
1688+
mem_addr as *mut u8,
1689+
mem::size_of::<__m512d>(),
1690+
);
1691+
}
1692+
1693+
/// Loads 512-bits (composed of 16 packed single-precision (32-bit)
1694+
/// floating-point elements) from memory into result.
1695+
/// `mem_addr` does not need to be aligned on any particular boundary.
1696+
///
1697+
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_loadu_ps)
1698+
#[inline]
1699+
#[target_feature(enable = "avx512f")]
1700+
#[cfg_attr(test, assert_instr(vmovups))]
1701+
pub unsafe fn _mm512_loadu_ps(mem_addr: *const f32) -> __m512 {
1702+
let mut dst = _mm512_undefined_ps();
1703+
ptr::copy_nonoverlapping(
1704+
mem_addr as *const u8,
1705+
&mut dst as *mut __m512 as *mut u8,
1706+
mem::size_of::<__m512>(),
1707+
);
1708+
dst
1709+
}
1710+
1711+
/// Stores 512-bits (composed of 16 packed single-precision (32-bit)
1712+
/// floating-point elements) from `a` into memory.
1713+
/// `mem_addr` does not need to be aligned on any particular boundary.
1714+
///
1715+
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_storeu_ps)
1716+
#[inline]
1717+
#[target_feature(enable = "avx512f")]
1718+
#[cfg_attr(test, assert_instr(vmovups))]
1719+
#[stable(feature = "simd_x86", since = "1.27.0")]
1720+
pub unsafe fn _mm512_storeu_ps(mem_addr: *mut f32, a: __m512) {
1721+
ptr::copy_nonoverlapping(
1722+
&a as *const __m512 as *const u8,
1723+
mem_addr as *mut u8,
1724+
mem::size_of::<__m512>(),
1725+
);
1726+
}
1727+
16361728
/// Equal
16371729
pub const _MM_CMPINT_EQ: _MM_CMPINT_ENUM = 0x00;
16381730
/// Less-than
@@ -1702,6 +1794,8 @@ mod tests {
17021794
use stdarch_test::simd_test;
17031795

17041796
use crate::core_arch::x86::*;
1797+
use crate::core_arch::x86_64::_mm512_setr_pd;
1798+
use crate::hint::black_box;
17051799

17061800
#[simd_test(enable = "avx512f")]
17071801
unsafe fn test_mm512_abs_epi32() {
@@ -2326,4 +2420,42 @@ mod tests {
23262420
unsafe fn test_mm512_setzero_ps() {
23272421
assert_eq_m512(_mm512_setzero_ps(), _mm512_set1_ps(0.));
23282422
}
2423+
2424+
#[simd_test(enable = "avx512f")]
2425+
unsafe fn test_mm512_loadu_pd() {
2426+
let a = &[4., 3., 2., 5., 8., 9., 64., 50.];
2427+
let p = a.as_ptr();
2428+
let r = _mm512_loadu_pd(black_box(p));
2429+
let e = _mm512_setr_pd(4., 3., 2., 5., 8., 9., 64., 50.);
2430+
assert_eq_m512d(r, e);
2431+
}
2432+
2433+
#[simd_test(enable = "avx512f")]
2434+
unsafe fn test_mm512_storeu_pd() {
2435+
let a = _mm512_set1_pd(9.);
2436+
let mut r = _mm512_undefined_pd();
2437+
_mm512_storeu_pd(&mut r as *mut _ as *mut f64, a);
2438+
assert_eq_m512d(r, a);
2439+
}
2440+
2441+
#[simd_test(enable = "avx512f")]
2442+
unsafe fn test_mm512_loadu_ps() {
2443+
let a = &[
2444+
4., 3., 2., 5., 8., 9., 64., 50., -4., -3., -2., -5., -8., -9., -64., -50.,
2445+
];
2446+
let p = a.as_ptr();
2447+
let r = _mm512_loadu_ps(black_box(p));
2448+
let e = _mm512_setr_ps(
2449+
4., 3., 2., 5., 8., 9., 64., 50., -4., -3., -2., -5., -8., -9., -64., -50.,
2450+
);
2451+
assert_eq_m512(r, e);
2452+
}
2453+
2454+
#[simd_test(enable = "avx512f")]
2455+
unsafe fn test_mm512_storeu_ps() {
2456+
let a = _mm512_set1_ps(9.);
2457+
let mut r = _mm512_undefined_ps();
2458+
_mm512_storeu_ps(&mut r as *mut _ as *mut f32, a);
2459+
assert_eq_m512(r, a);
2460+
}
23292461
}

crates/stdarch-verify/tests/x86-intel.rs

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -282,6 +282,8 @@ fn verify_all_signatures() {
282282
"_mm_tzcnt_64",
283283
"_fxsave64",
284284
"_fxrstor64",
285+
"_mm512_undefined_ps",
286+
"_mm512_undefined_pd",
285287
];
286288
if !skip.contains(&rust.name) {
287289
println!(
@@ -625,6 +627,8 @@ fn equate(t: &Type, intel: &str, intrinsic: &str, is_const: bool) -> Result<(),
625627

626628
(&Type::MutPtr(&Type::PrimFloat(32)), "float*") => {}
627629
(&Type::MutPtr(&Type::PrimFloat(64)), "double*") => {}
630+
(&Type::MutPtr(&Type::PrimFloat(32)), "void*") => {}
631+
(&Type::MutPtr(&Type::PrimFloat(64)), "void*") => {}
628632
(&Type::MutPtr(&Type::PrimSigned(32)), "int*") => {}
629633
(&Type::MutPtr(&Type::PrimSigned(32)), "__int32*") => {}
630634
(&Type::MutPtr(&Type::PrimSigned(64)), "__int64*") => {}
@@ -646,6 +650,8 @@ fn equate(t: &Type, intel: &str, intrinsic: &str, is_const: bool) -> Result<(),
646650

647651
(&Type::ConstPtr(&Type::PrimFloat(32)), "float const*") => {}
648652
(&Type::ConstPtr(&Type::PrimFloat(64)), "double const*") => {}
653+
(&Type::ConstPtr(&Type::PrimFloat(32)), "void const*") => {}
654+
(&Type::ConstPtr(&Type::PrimFloat(64)), "void const*") => {}
649655
(&Type::ConstPtr(&Type::PrimSigned(32)), "int const*") => {}
650656
(&Type::ConstPtr(&Type::PrimSigned(32)), "__int32 const*") => {}
651657
(&Type::ConstPtr(&Type::PrimSigned(64)), "__int64 const*") => {}

0 commit comments

Comments
 (0)