Skip to content

Commit 79dee01

Browse files
author
Daniel Smith
committed
Add _mm512_mask_i32gather_epi64
1 parent 01102d7 commit 79dee01

File tree

2 files changed

+41
-1
lines changed

2 files changed

+41
-1
lines changed

crates/core_arch/src/x86/avx512f.rs

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -107,6 +107,32 @@ pub unsafe fn _mm512_i32gather_epi64(offsets: __m256i, slice: *const u8, scale:
107107
transmute(r)
108108
}
109109

110+
/// Gather 64-bit integers from memory using 32-bit indices.
111+
///
112+
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_i32gather_epi64)
113+
#[inline]
114+
#[target_feature(enable = "avx512f")]
115+
#[cfg_attr(test, assert_instr(vpgatherdq, scale = 1))]
116+
pub unsafe fn _mm512_mask_i32gather_epi64(
117+
src: __m512i,
118+
mask: __mmask8,
119+
offsets: __m256i,
120+
slice: *const u8,
121+
scale: i32,
122+
) -> __m512i {
123+
let src = src.as_i64x8();
124+
let mask = mask as i8;
125+
let slice = slice as *const i8;
126+
let offsets = offsets.as_i32x8();
127+
macro_rules! call {
128+
($imm8:expr) => {
129+
vpgatherdq(src, slice, offsets, mask, $imm8)
130+
};
131+
}
132+
let r = constify_imm8!(scale, call);
133+
transmute(r)
134+
}
135+
110136
#[allow(improper_ctypes)]
111137
extern "C" {
112138
#[link_name = "llvm.x86.avx512.gather.dpq.512"]

crates/core_arch/src/x86_64/avx512f.rs

Lines changed: 15 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -235,7 +235,6 @@ mod tests {
235235
assert_eq_m512i(r, _mm512_setr_epi64(7, 6, 5, 4, 3, 2, 1, 0))
236236
}
237237

238-
// _mm512_i32gather_epi64(offsets: __m256i, slice: *const i64, scale: i32)
239238
#[simd_test(enable = "avx512f")]
240239
unsafe fn test_mm512_i32gather_epi64() {
241240
let mut arr = [0i64; 128];
@@ -248,4 +247,19 @@ mod tests {
248247
let r = _mm512_i32gather_epi64(index, arr.as_ptr() as *const u8, 8);
249248
assert_eq_m512i(r, _mm512_setr_epi64(0, 16, 32, 48, 64, 80, 96, 112));
250249
}
250+
251+
#[simd_test(enable = "avx512f")]
252+
unsafe fn test_mm512_mask_i32gather_epi64() {
253+
let mut arr = [0i64; 128];
254+
for i in 0..128i64 {
255+
arr[i as usize] = i;
256+
}
257+
let src = _mm512_set1_epi64(2);
258+
let mask = 0b10101010;
259+
#[rustfmt::skip]
260+
let index = _mm256_setr_epi32(0, 16, 32, 48, 64, 80, 96, 112);
261+
// A multiplier of 8 is word-addressing
262+
let r = _mm512_mask_i32gather_epi64(src, mask, index, arr.as_ptr() as *const u8, 8);
263+
assert_eq_m512i(r, _mm512_setr_epi64(2, 16, 2, 48, 2, 80, 2, 112));
264+
}
251265
}

0 commit comments

Comments
 (0)