Skip to content

Commit abf64fb

Browse files
committed
fixed UB in _mm512_undefined_p{d,s}; improved _mm512_loads
1 parent eeadf05 commit abf64fb

File tree

1 file changed

+4
-18
lines changed

1 file changed

+4
-18
lines changed

crates/core_arch/src/x86/avx512f.rs

Lines changed: 4 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -1641,8 +1641,7 @@ pub unsafe fn _mm512_mask_cmp_epi64_mask(
16411641
#[target_feature(enable = "avx512f")]
16421642
// This intrinsic has no corresponding instruction.
16431643
pub unsafe fn _mm512_undefined_pd() -> __m512d {
1644-
// FIXME: this function should return MaybeUninit<__m512d>
1645-
mem::MaybeUninit::<__m512d>::uninit().assume_init()
1644+
_mm512_set1_pd(0.0)
16461645
}
16471646

16481647
/// Returns vector of type `__m512` with undefined elements.
@@ -1652,8 +1651,7 @@ pub unsafe fn _mm512_undefined_pd() -> __m512d {
16521651
#[target_feature(enable = "avx512f")]
16531652
// This intrinsic has no corresponding instruction.
16541653
pub unsafe fn _mm512_undefined_ps() -> __m512 {
1655-
// FIXME: this function should return MaybeUninit<__m512>
1656-
mem::MaybeUninit::<__m512>::uninit().assume_init()
1654+
_mm512_set1_ps(0.0)
16571655
}
16581656

16591657
/// Loads 512-bits (composed of 8 packed double-precision (64-bit)
@@ -1665,13 +1663,7 @@ pub unsafe fn _mm512_undefined_ps() -> __m512 {
16651663
#[target_feature(enable = "avx512f")]
16661664
#[cfg_attr(test, assert_instr(vmovupd))]
16671665
pub unsafe fn _mm512_loadu_pd(mem_addr: *const f64) -> __m512d {
1668-
let mut dst = _mm512_undefined_pd();
1669-
ptr::copy_nonoverlapping(
1670-
mem_addr as *const u8,
1671-
&mut dst as *mut __m512d as *mut u8,
1672-
mem::size_of::<__m512d>(),
1673-
);
1674-
dst
1666+
ptr::read_unaligned(mem_addr as *const __m512d)
16751667
}
16761668

16771669
/// Stores 512-bits (composed of 8 packed double-precision (64-bit)
@@ -1699,13 +1691,7 @@ pub unsafe fn _mm512_storeu_pd(mem_addr: *mut f64, a: __m512d) {
16991691
#[target_feature(enable = "avx512f")]
17001692
#[cfg_attr(test, assert_instr(vmovups))]
17011693
pub unsafe fn _mm512_loadu_ps(mem_addr: *const f32) -> __m512 {
1702-
let mut dst = _mm512_undefined_ps();
1703-
ptr::copy_nonoverlapping(
1704-
mem_addr as *const u8,
1705-
&mut dst as *mut __m512 as *mut u8,
1706-
mem::size_of::<__m512>(),
1707-
);
1708-
dst
1694+
ptr::read_unaligned(mem_addr as *const __m512)
17091695
}
17101696

17111697
/// Stores 512-bits (composed of 16 packed single-precision (32-bit)

0 commit comments

Comments
 (0)