@@ -217,11 +217,27 @@ impl<'a, 'gcc, 'tcx> Builder<'a, 'gcc, 'tcx> {
217
217
return Cow :: Borrowed ( args) ;
218
218
}
219
219
220
+ let func_name = format ! ( "{:?}" , func_ptr) ;
221
+
220
222
let casted_args: Vec < _ > = param_types
221
223
. into_iter ( )
222
224
. zip ( args. iter ( ) )
223
225
. enumerate ( )
224
226
. map ( |( index, ( expected_ty, & actual_val) ) | {
227
+ // NOTE: these intrinsics have missing parameters before the last one, so ignore the
228
+ // last argument type check.
229
+ // FIXME(antoyo): find a way to refactor in order to avoid this hack.
230
+ match & * func_name {
231
+ "__builtin_ia32_maxps512_mask" | "__builtin_ia32_maxpd512_mask"
232
+ | "__builtin_ia32_minps512_mask" | "__builtin_ia32_minpd512_mask" | "__builtin_ia32_sqrtps512_mask"
233
+ | "__builtin_ia32_sqrtpd512_mask" => {
234
+ if index == args. len ( ) - 1 {
235
+ return actual_val;
236
+ }
237
+ } ,
238
+ _ => ( ) ,
239
+ }
240
+
225
241
let actual_ty = actual_val. get_type ( ) ;
226
242
if expected_ty != actual_ty {
227
243
if !actual_ty. is_vector ( ) && !expected_ty. is_vector ( ) && actual_ty. is_integral ( ) && expected_ty. is_integral ( ) && actual_ty. get_size ( ) != expected_ty. get_size ( ) {
@@ -286,7 +302,7 @@ impl<'a, 'gcc, 'tcx> Builder<'a, 'gcc, 'tcx> {
286
302
}
287
303
288
304
fn function_ptr_call ( & mut self , func_ptr : RValue < ' gcc > , args : & [ RValue < ' gcc > ] , _funclet : Option < & Funclet > ) -> RValue < ' gcc > {
289
- let args = self . check_ptr_call ( "call" , func_ptr, args) ;
305
+ let mut args = self . check_ptr_call ( "call" , func_ptr, args) ;
290
306
291
307
// gccjit requires to use the result of functions, even when it's not used.
292
308
// That's why we assign the result to a local or call add_eval().
@@ -298,6 +314,92 @@ impl<'a, 'gcc, 'tcx> Builder<'a, 'gcc, 'tcx> {
298
314
if return_type != void_type {
299
315
unsafe { RETURN_VALUE_COUNT += 1 } ;
300
316
let result = current_func. new_local ( None , return_type, & format ! ( "ptrReturnValue{}" , unsafe { RETURN_VALUE_COUNT } ) ) ;
317
+ // Some LLVM intrinsics do not map 1-to-1 to GCC intrinsics, so we add the missing
318
+ // arguments here.
319
+ if gcc_func. get_param_count ( ) != args. len ( ) {
320
+ let func_name = format ! ( "{:?}" , func_ptr) ;
321
+ match & * func_name {
322
+ "__builtin_ia32_pmuldq512_mask" | "__builtin_ia32_pmuludq512_mask"
323
+ // FIXME(antoyo): the following intrinsics has 4 (or 5) arguments according to the doc, but is defined with 2 (or 3) arguments in library/stdarch/crates/core_arch/src/x86/avx512f.rs.
324
+ | "__builtin_ia32_pmaxsd512_mask" | "__builtin_ia32_pmaxsq512_mask" | "__builtin_ia32_pmaxsq256_mask"
325
+ | "__builtin_ia32_pmaxsq128_mask" | "__builtin_ia32_maxps512_mask" | "__builtin_ia32_maxpd512_mask"
326
+ | "__builtin_ia32_pmaxud512_mask" | "__builtin_ia32_pmaxuq512_mask" | "__builtin_ia32_pmaxuq256_mask"
327
+ | "__builtin_ia32_pmaxuq128_mask"
328
+ | "__builtin_ia32_pminsd512_mask" | "__builtin_ia32_pminsq512_mask" | "__builtin_ia32_pminsq256_mask"
329
+ | "__builtin_ia32_pminsq128_mask" | "__builtin_ia32_minps512_mask" | "__builtin_ia32_minpd512_mask"
330
+ | "__builtin_ia32_pminud512_mask" | "__builtin_ia32_pminuq512_mask" | "__builtin_ia32_pminuq256_mask"
331
+ | "__builtin_ia32_pminuq128_mask" | "__builtin_ia32_sqrtps512_mask" | "__builtin_ia32_sqrtpd512_mask"
332
+ => {
333
+ // TODO: refactor by separating those intrinsics outside of this branch.
334
+ let add_before_last_arg =
335
+ match & * func_name {
336
+ "__builtin_ia32_maxps512_mask" | "__builtin_ia32_maxpd512_mask"
337
+ | "__builtin_ia32_minps512_mask" | "__builtin_ia32_minpd512_mask"
338
+ | "__builtin_ia32_sqrtps512_mask" | "__builtin_ia32_sqrtpd512_mask" => true ,
339
+ _ => false ,
340
+ } ;
341
+ let new_first_arg_is_zero =
342
+ match & * func_name {
343
+ "__builtin_ia32_pmaxuq256_mask" | "__builtin_ia32_pmaxuq128_mask"
344
+ | "__builtin_ia32_pminuq256_mask" | "__builtin_ia32_pminuq128_mask" => true ,
345
+ _ => false
346
+ } ;
347
+ let arg3_index =
348
+ match & * func_name {
349
+ "__builtin_ia32_sqrtps512_mask" | "__builtin_ia32_sqrtpd512_mask" => 1 ,
350
+ _ => 2 ,
351
+ } ;
352
+ let mut new_args = args. to_vec ( ) ;
353
+ let arg3_type = gcc_func. get_param_type ( arg3_index) ;
354
+ let first_arg =
355
+ if new_first_arg_is_zero {
356
+ let vector_type = arg3_type. dyncast_vector ( ) . expect ( "vector type" ) ;
357
+ let zero = self . context . new_rvalue_zero ( vector_type. get_element_type ( ) ) ;
358
+ let num_units = vector_type. get_num_units ( ) ;
359
+ self . context . new_rvalue_from_vector ( None , arg3_type, & vec ! [ zero; num_units] )
360
+ }
361
+ else {
362
+ self . current_func ( ) . new_local ( None , arg3_type, "undefined_for_intrinsic" ) . to_rvalue ( )
363
+ } ;
364
+ if add_before_last_arg {
365
+ new_args. insert ( new_args. len ( ) - 1 , first_arg) ;
366
+ }
367
+ else {
368
+ new_args. push ( first_arg) ;
369
+ }
370
+ let arg4_index =
371
+ match & * func_name {
372
+ "__builtin_ia32_sqrtps512_mask" | "__builtin_ia32_sqrtpd512_mask" => 2 ,
373
+ _ => 3 ,
374
+ } ;
375
+ let arg4_type = gcc_func. get_param_type ( arg4_index) ;
376
+ let minus_one = self . context . new_rvalue_from_int ( arg4_type, -1 ) ;
377
+ if add_before_last_arg {
378
+ new_args. insert ( new_args. len ( ) - 1 , minus_one) ;
379
+ }
380
+ else {
381
+ new_args. push ( minus_one) ;
382
+ }
383
+ args = new_args. into ( ) ;
384
+ } ,
385
+ "__builtin_ia32_vfmaddps512_mask" | "__builtin_ia32_vfmaddpd512_mask" => {
386
+ let mut new_args = args. to_vec ( ) ;
387
+ if args. len ( ) == 3 {
388
+ // Both llvm.fma.v16f32 and llvm.x86.avx512.vfmaddsub.ps.512 maps to
389
+ // the same GCC intrinsic, but the former has 3 parameters and the
390
+ // latter has 4 so it doesn't require this additional argument.
391
+ let arg4_type = gcc_func. get_param_type ( 3 ) ;
392
+ let minus_one = self . context . new_rvalue_from_int ( arg4_type, -1 ) ;
393
+ new_args. push ( minus_one) ;
394
+ }
395
+
396
+ let arg5_type = gcc_func. get_param_type ( 4 ) ;
397
+ new_args. push ( self . context . new_rvalue_from_int ( arg5_type, 4 ) ) ;
398
+ args = new_args. into ( ) ;
399
+ } ,
400
+ _ => ( ) ,
401
+ }
402
+ }
301
403
self . block . add_assignment ( None , result, self . cx . context . new_call_through_ptr ( None , func_ptr, & args) ) ;
302
404
result. to_rvalue ( )
303
405
}
0 commit comments