@@ -188,6 +188,51 @@ pub unsafe fn _mm512_mask_i64gather_pd(
188
188
transmute ( r)
189
189
}
190
190
191
+ /// Gather single-precision (32-bit) floating-point elements from memory using 64-bit indices.
192
+ ///
193
+ /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_i64gather_ps)
194
+ #[ inline]
195
+ #[ target_feature( enable = "avx512f" ) ]
196
+ #[ cfg_attr( test, assert_instr( vgatherqps, scale = 1 ) ) ]
197
+ pub unsafe fn _mm512_i64gather_ps ( offsets : __m512i , slice : * const u8 , scale : i32 ) -> __m256 {
198
+ let zero = _mm256_setzero_ps ( ) . as_f32x8 ( ) ;
199
+ let neg_one = -1 ;
200
+ let slice = slice as * const i8 ;
201
+ let offsets = offsets. as_i64x8 ( ) ;
202
+ macro_rules! call {
203
+ ( $imm8: expr) => {
204
+ vgatherqps( zero, slice, offsets, neg_one, $imm8)
205
+ } ;
206
+ }
207
+ let r = constify_imm8 ! ( scale, call) ;
208
+ transmute ( r)
209
+ }
210
+
211
+ /// Gather single-precision (32-bit) floating-point elements from memory using 64-bit indices.
212
+ ///
213
+ /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_i64gather_ps)
214
+ #[ inline]
215
+ #[ target_feature( enable = "avx512f" ) ]
216
+ #[ cfg_attr( test, assert_instr( vgatherqps, scale = 1 ) ) ]
217
+ pub unsafe fn _mm512_mask_i64gather_ps (
218
+ src : __m256 ,
219
+ mask : __mmask8 ,
220
+ offsets : __m512i ,
221
+ slice : * const u8 ,
222
+ scale : i32 ,
223
+ ) -> __m256 {
224
+ let src = src. as_f32x8 ( ) ;
225
+ let slice = slice as * const i8 ;
226
+ let offsets = offsets. as_i64x8 ( ) ;
227
+ macro_rules! call {
228
+ ( $imm8: expr) => {
229
+ vgatherqps( src, slice, offsets, mask as i8 , $imm8)
230
+ } ;
231
+ }
232
+ let r = constify_imm8 ! ( scale, call) ;
233
+ transmute ( r)
234
+ }
235
+
191
236
/// Gather 64-bit integers from memory using 32-bit indices.
192
237
///
193
238
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_i32gather_epi64)
@@ -280,16 +325,66 @@ pub unsafe fn _mm512_mask_i64gather_epi64(
280
325
transmute ( r)
281
326
}
282
327
328
+ /// Gather 32-bit integers from memory using 64-bit indices.
329
+ ///
330
+ /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_i64gather_epi64)
331
+ #[ inline]
332
+ #[ target_feature( enable = "avx512f" ) ]
333
+ #[ cfg_attr( test, assert_instr( vpgatherqd, scale = 1 ) ) ]
334
+ pub unsafe fn _mm512_i64gather_epi32 ( offsets : __m512i , slice : * const u8 , scale : i32 ) -> __m256i {
335
+ let zeros = _mm256_setzero_si256 ( ) . as_i32x8 ( ) ;
336
+ let neg_one = -1 ;
337
+ let slice = slice as * const i8 ;
338
+ let offsets = offsets. as_i64x8 ( ) ;
339
+ macro_rules! call {
340
+ ( $imm8: expr) => {
341
+ vpgatherqd( zeros, slice, offsets, neg_one, $imm8)
342
+ } ;
343
+ }
344
+ let r = constify_imm8 ! ( scale, call) ;
345
+ transmute ( r)
346
+ }
347
+
348
+ /// Gather 32-bit integers from memory using 64-bit indices.
349
+ ///
350
+ /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_i64gather_epi64)
351
+ #[ inline]
352
+ #[ target_feature( enable = "avx512f" ) ]
353
+ #[ cfg_attr( test, assert_instr( vpgatherqd, scale = 1 ) ) ]
354
+ pub unsafe fn _mm512_mask_i64gather_epi32 (
355
+ src : __m256i ,
356
+ mask : __mmask8 ,
357
+ offsets : __m512i ,
358
+ slice : * const u8 ,
359
+ scale : i32 ,
360
+ ) -> __m256i {
361
+ let src = src. as_i32x8 ( ) ;
362
+ let mask = mask as i8 ;
363
+ let slice = slice as * const i8 ;
364
+ let offsets = offsets. as_i64x8 ( ) ;
365
+ macro_rules! call {
366
+ ( $imm8: expr) => {
367
+ vpgatherqd( src, slice, offsets, mask, $imm8)
368
+ } ;
369
+ }
370
+ let r = constify_imm8 ! ( scale, call) ;
371
+ transmute ( r)
372
+ }
373
+
283
374
#[ allow( improper_ctypes) ]
284
375
extern "C" {
285
376
#[ link_name = "llvm.x86.avx512.gather.dpd.512" ]
286
377
fn vgatherdpd ( src : f64x8 , slice : * const i8 , offsets : i32x8 , mask : i8 , scale : i32 ) -> f64x8 ;
287
378
#[ link_name = "llvm.x86.avx512.gather.qpd.512" ]
288
379
fn vgatherqpd ( src : f64x8 , slice : * const i8 , offsets : i64x8 , mask : i8 , scale : i32 ) -> f64x8 ;
380
+ #[ link_name = "llvm.x86.avx512.gather.qps.512" ]
381
+ fn vgatherqps ( src : f32x8 , slice : * const i8 , offsets : i64x8 , mask : i8 , scale : i32 ) -> f32x8 ;
289
382
#[ link_name = "llvm.x86.avx512.gather.dpq.512" ]
290
383
fn vpgatherdq ( src : i64x8 , slice : * const i8 , offsets : i32x8 , mask : i8 , scale : i32 ) -> i64x8 ;
291
384
#[ link_name = "llvm.x86.avx512.gather.qpq.512" ]
292
385
fn vpgatherqq ( src : i64x8 , slice : * const i8 , offsets : i64x8 , mask : i8 , scale : i32 ) -> i64x8 ;
386
+ #[ link_name = "llvm.x86.avx512.gather.qpi.512" ]
387
+ fn vpgatherqd ( src : i32x8 , slice : * const i8 , offsets : i64x8 , mask : i8 , scale : i32 ) -> i32x8 ;
293
388
}
294
389
295
390
/// Broadcast 64-bit float `a` to all elements of `dst`.
0 commit comments