|
1 | 1 | use crate::{
|
2 | 2 | core_arch::{simd::*, simd_llvm::*, x86::*},
|
3 | 3 | mem::{self, transmute},
|
| 4 | + ptr |
4 | 5 | };
|
5 | 6 |
|
6 | 7 | #[cfg(test)]
|
@@ -1633,6 +1634,98 @@ pub unsafe fn _mm512_mask_cmp_epi64_mask(
|
1633 | 1634 | transmute(r)
|
1634 | 1635 | }
|
1635 | 1636 |
|
| 1637 | +/// Returns vector of type `__m512d` with undefined elements. |
| 1638 | +/// |
| 1639 | +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_undefined_pd) |
| 1640 | +#[inline] |
| 1641 | +#[target_feature(enable = "avx512f")] |
| 1642 | +// This intrinsic has no corresponding instruction. |
| 1643 | +pub unsafe fn _mm512_undefined_pd() -> __m512d { |
| 1644 | + // FIXME: this function should return MaybeUninit<__m512d> |
| 1645 | + mem::MaybeUninit::<__m512d>::uninit().assume_init() |
| 1646 | +} |
| 1647 | + |
| 1648 | +/// Returns vector of type `__m512` with undefined elements. |
| 1649 | +/// |
| 1650 | +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_undefined_ps) |
| 1651 | +#[inline] |
| 1652 | +#[target_feature(enable = "avx512f")] |
| 1653 | +// This intrinsic has no corresponding instruction. |
| 1654 | +pub unsafe fn _mm512_undefined_ps() -> __m512 { |
| 1655 | + // FIXME: this function should return MaybeUninit<__m512> |
| 1656 | + mem::MaybeUninit::<__m512>::uninit().assume_init() |
| 1657 | +} |
| 1658 | + |
| 1659 | +/// Loads 512-bits (composed of 8 packed double-precision (64-bit) |
| 1660 | +/// floating-point elements) from memory into result. |
| 1661 | +/// `mem_addr` does not need to be aligned on any particular boundary. |
| 1662 | +/// |
| 1663 | +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_loadu_pd) |
| 1664 | +#[inline] |
| 1665 | +#[target_feature(enable = "avx512f")] |
| 1666 | +#[cfg_attr(test, assert_instr(vmovupd))] |
| 1667 | +pub unsafe fn _mm512_loadu_pd(mem_addr: *const f64) -> __m512d { |
| 1668 | + let mut dst = _mm512_undefined_pd(); |
| 1669 | + ptr::copy_nonoverlapping( |
| 1670 | + mem_addr as *const u8, |
| 1671 | + &mut dst as *mut __m512d as *mut u8, |
| 1672 | + mem::size_of::<__m512d>(), |
| 1673 | + ); |
| 1674 | + dst |
| 1675 | +} |
| 1676 | + |
| 1677 | +/// Stores 512-bits (composed of 8 packed double-precision (64-bit) |
| 1678 | +/// floating-point elements) from `a` into memory. |
| 1679 | +/// `mem_addr` does not need to be aligned on any particular boundary. |
| 1680 | +/// |
| 1681 | +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_storeu_pd) |
| 1682 | +#[inline] |
| 1683 | +#[target_feature(enable = "avx512f")] |
| 1684 | +#[cfg_attr(test, assert_instr(vmovupd))] |
| 1685 | +pub unsafe fn _mm512_storeu_pd(mem_addr: *mut f64, a: __m512d) { |
| 1686 | + ptr::copy_nonoverlapping( |
| 1687 | + &a as *const __m512d as *const u8, |
| 1688 | + mem_addr as *mut u8, |
| 1689 | + mem::size_of::<__m512d>(), |
| 1690 | + ); |
| 1691 | +} |
| 1692 | + |
| 1693 | +/// Loads 512-bits (composed of 16 packed single-precision (32-bit) |
| 1694 | +/// floating-point elements) from memory into result. |
| 1695 | +/// `mem_addr` does not need to be aligned on any particular boundary. |
| 1696 | +/// |
| 1697 | +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_loadu_ps) |
| 1698 | +#[inline] |
| 1699 | +#[target_feature(enable = "avx512f")] |
| 1700 | +#[cfg_attr(test, assert_instr(vmovups))] |
| 1701 | +pub unsafe fn _mm512_loadu_ps(mem_addr: *const f32) -> __m512 { |
| 1702 | + let mut dst = _mm512_undefined_ps(); |
| 1703 | + ptr::copy_nonoverlapping( |
| 1704 | + mem_addr as *const u8, |
| 1705 | + &mut dst as *mut __m512 as *mut u8, |
| 1706 | + mem::size_of::<__m512>(), |
| 1707 | + ); |
| 1708 | + dst |
| 1709 | +} |
| 1710 | + |
| 1711 | +/// Stores 512-bits (composed of 16 packed single-precision (32-bit) |
| 1712 | +/// floating-point elements) from `a` into memory. |
| 1713 | +/// `mem_addr` does not need to be aligned on any particular boundary. |
| 1714 | +/// |
| 1715 | +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_storeu_ps) |
| 1716 | +#[inline] |
| 1717 | +#[target_feature(enable = "avx512f")] |
| 1718 | +#[cfg_attr(test, assert_instr(vmovups))] |
| 1719 | +#[stable(feature = "simd_x86", since = "1.27.0")] |
| 1720 | +pub unsafe fn _mm512_storeu_ps(mem_addr: *mut f32, a: __m512) { |
| 1721 | + ptr::copy_nonoverlapping( |
| 1722 | + &a as *const __m512 as *const u8, |
| 1723 | + mem_addr as *mut u8, |
| 1724 | + mem::size_of::<__m512>(), |
| 1725 | + ); |
| 1726 | +} |
| 1727 | + |
| 1728 | + |
1636 | 1729 | /// Equal
|
1637 | 1730 | pub const _MM_CMPINT_EQ: _MM_CMPINT_ENUM = 0x00;
|
1638 | 1731 | /// Less-than
|
@@ -1702,6 +1795,8 @@ mod tests {
|
1702 | 1795 | use stdarch_test::simd_test;
|
1703 | 1796 |
|
1704 | 1797 | use crate::core_arch::x86::*;
|
| 1798 | + use crate::hint::black_box; |
| 1799 | + use crate::core_arch::x86_64::_mm512_setr_pd; |
1705 | 1800 |
|
1706 | 1801 | #[simd_test(enable = "avx512f")]
|
1707 | 1802 | unsafe fn test_mm512_abs_epi32() {
|
@@ -2326,4 +2421,38 @@ mod tests {
|
2326 | 2421 | unsafe fn test_mm512_setzero_ps() {
|
2327 | 2422 | assert_eq_m512(_mm512_setzero_ps(), _mm512_set1_ps(0.));
|
2328 | 2423 | }
|
| 2424 | + |
| 2425 | + #[simd_test(enable = "avx512f")] |
| 2426 | + unsafe fn test_mm512_loadu_pd() { |
| 2427 | + let a = &[4., 3., 2., 5., 8., 9., 64., 50.]; |
| 2428 | + let p = a.as_ptr(); |
| 2429 | + let r = _mm512_loadu_pd(black_box(p)); |
| 2430 | + let e = _mm512_setr_pd(4., 3., 2., 5., 8., 9., 64., 50.); |
| 2431 | + assert_eq_m512d(r, e); |
| 2432 | + } |
| 2433 | + |
| 2434 | + #[simd_test(enable = "avx512f")] |
| 2435 | + unsafe fn test_mm512_storeu_pd() { |
| 2436 | + let a = _mm512_set1_pd(9.); |
| 2437 | + let mut r = _mm512_undefined_pd(); |
| 2438 | + _mm512_storeu_pd(&mut r as *mut _ as *mut f64, a); |
| 2439 | + assert_eq_m512d(r, a); |
| 2440 | + } |
| 2441 | + |
| 2442 | + #[simd_test(enable = "avx512f")] |
| 2443 | + unsafe fn test_mm512_loadu_ps() { |
| 2444 | + let a = &[4., 3., 2., 5., 8., 9., 64., 50., -4., -3., -2., -5., -8., -9., -64., -50.]; |
| 2445 | + let p = a.as_ptr(); |
| 2446 | + let r = _mm512_loadu_ps(black_box(p)); |
| 2447 | + let e = _mm512_setr_ps(4., 3., 2., 5., 8., 9., 64., 50., -4., -3., -2., -5., -8., -9., -64., -50.); |
| 2448 | + assert_eq_m512(r, e); |
| 2449 | + } |
| 2450 | + |
| 2451 | + #[simd_test(enable = "avx512f")] |
| 2452 | + unsafe fn test_mm512_storeu_ps() { |
| 2453 | + let a = _mm512_set1_ps(9.); |
| 2454 | + let mut r = _mm512_undefined_ps(); |
| 2455 | + _mm512_storeu_ps(&mut r as *mut _ as *mut f32, a); |
| 2456 | + assert_eq_m512(r, a); |
| 2457 | + } |
2329 | 2458 | }
|
0 commit comments