@@ -143,6 +143,51 @@ pub unsafe fn _mm512_mask_i32gather_pd(
143
143
transmute ( r)
144
144
}
145
145
146
+ /// Gather double-precision (64-bit) floating-point elements from memory using 64-bit indices.
147
+ ///
148
+ /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_i64gather_pd)
149
+ #[ inline]
150
+ #[ target_feature( enable = "avx512f" ) ]
151
+ #[ cfg_attr( test, assert_instr( vgatherqpd, scale = 1 ) ) ]
152
+ pub unsafe fn _mm512_i64gather_pd ( offsets : __m512i , slice : * const u8 , scale : i32 ) -> __m512d {
153
+ let zero = _mm512_setzero_pd ( ) . as_f64x8 ( ) ;
154
+ let neg_one = -1 ;
155
+ let slice = slice as * const i8 ;
156
+ let offsets = offsets. as_i64x8 ( ) ;
157
+ macro_rules! call {
158
+ ( $imm8: expr) => {
159
+ vgatherqpd( zero, slice, offsets, neg_one, $imm8)
160
+ } ;
161
+ }
162
+ let r = constify_imm8 ! ( scale, call) ;
163
+ transmute ( r)
164
+ }
165
+
166
+ /// Gather double-precision (64-bit) floating-point elements from memory using 64-bit indices.
167
+ ///
168
+ /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_i64gather_pd)
169
+ #[ inline]
170
+ #[ target_feature( enable = "avx512f" ) ]
171
+ #[ cfg_attr( test, assert_instr( vgatherqpd, scale = 1 ) ) ]
172
+ pub unsafe fn _mm512_mask_i64gather_pd (
173
+ src : __m512d ,
174
+ mask : __mmask8 ,
175
+ offsets : __m512i ,
176
+ slice : * const u8 ,
177
+ scale : i32 ,
178
+ ) -> __m512d {
179
+ let src = src. as_f64x8 ( ) ;
180
+ let slice = slice as * const i8 ;
181
+ let offsets = offsets. as_i64x8 ( ) ;
182
+ macro_rules! call {
183
+ ( $imm8: expr) => {
184
+ vgatherqpd( src, slice, offsets, mask as i8 , $imm8)
185
+ } ;
186
+ }
187
+ let r = constify_imm8 ! ( scale, call) ;
188
+ transmute ( r)
189
+ }
190
+
146
191
/// Gather 64-bit integers from memory using 32-bit indices.
147
192
///
148
193
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_i32gather_epi64)
@@ -189,12 +234,62 @@ pub unsafe fn _mm512_mask_i32gather_epi64(
189
234
transmute ( r)
190
235
}
191
236
237
+ /// Gather 64-bit integers from memory using 64-bit indices.
238
+ ///
239
+ /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_i64gather_epi64)
240
+ #[ inline]
241
+ #[ target_feature( enable = "avx512f" ) ]
242
+ #[ cfg_attr( test, assert_instr( vpgatherqq, scale = 1 ) ) ]
243
+ pub unsafe fn _mm512_i64gather_epi64 ( offsets : __m512i , slice : * const u8 , scale : i32 ) -> __m512i {
244
+ let zero = _mm512_setzero_si512 ( ) . as_i64x8 ( ) ;
245
+ let neg_one = -1 ;
246
+ let slice = slice as * const i8 ;
247
+ let offsets = offsets. as_i64x8 ( ) ;
248
+ macro_rules! call {
249
+ ( $imm8: expr) => {
250
+ vpgatherqq( zero, slice, offsets, neg_one, $imm8)
251
+ } ;
252
+ }
253
+ let r = constify_imm8 ! ( scale, call) ;
254
+ transmute ( r)
255
+ }
256
+
257
+ /// Gather 64-bit integers from memory using 64-bit indices.
258
+ ///
259
+ /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_i64gather_epi64)
260
+ #[ inline]
261
+ #[ target_feature( enable = "avx512f" ) ]
262
+ #[ cfg_attr( test, assert_instr( vpgatherqq, scale = 1 ) ) ]
263
+ pub unsafe fn _mm512_mask_i64gather_epi64 (
264
+ src : __m512i ,
265
+ mask : __mmask8 ,
266
+ offsets : __m512i ,
267
+ slice : * const u8 ,
268
+ scale : i32 ,
269
+ ) -> __m512i {
270
+ let src = src. as_i64x8 ( ) ;
271
+ let mask = mask as i8 ;
272
+ let slice = slice as * const i8 ;
273
+ let offsets = offsets. as_i64x8 ( ) ;
274
+ macro_rules! call {
275
+ ( $imm8: expr) => {
276
+ vpgatherqq( src, slice, offsets, mask, $imm8)
277
+ } ;
278
+ }
279
+ let r = constify_imm8 ! ( scale, call) ;
280
+ transmute ( r)
281
+ }
282
+
192
283
#[ allow( improper_ctypes) ]
193
284
extern "C" {
194
285
#[ link_name = "llvm.x86.avx512.gather.dpd.512" ]
195
286
fn vgatherdpd ( src : f64x8 , slice : * const i8 , offsets : i32x8 , mask : i8 , scale : i32 ) -> f64x8 ;
287
+ #[ link_name = "llvm.x86.avx512.gather.qpd.512" ]
288
+ fn vgatherqpd ( src : f64x8 , slice : * const i8 , offsets : i64x8 , mask : i8 , scale : i32 ) -> f64x8 ;
196
289
#[ link_name = "llvm.x86.avx512.gather.dpq.512" ]
197
290
fn vpgatherdq ( src : i64x8 , slice : * const i8 , offsets : i32x8 , mask : i8 , scale : i32 ) -> i64x8 ;
291
+ #[ link_name = "llvm.x86.avx512.gather.qpq.512" ]
292
+ fn vpgatherqq ( src : i64x8 , slice : * const i8 , offsets : i64x8 , mask : i8 , scale : i32 ) -> i64x8 ;
198
293
}
199
294
200
295
/// Broadcast 64-bit float `a` to all elements of `dst`.
0 commit comments