Skip to content

Commit 4b40ac7

Browse files
committed
Support more SIMD intrinsics and refactor argument adjustment
1 parent eba654c commit 4b40ac7

File tree

2 files changed

+194
-126
lines changed

2 files changed

+194
-126
lines changed

src/builder.rs

Lines changed: 12 additions & 101 deletions
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,7 @@ use rustc_target::spec::{HasTargetSpec, Target};
4848

4949
use crate::common::{SignType, TypeReflection, type_is_pointer};
5050
use crate::context::CodegenCx;
51+
use crate::intrinsic::llvm;
5152
use crate::type_of::LayoutGccExt;
5253

5354
// TODO(antoyo)
@@ -224,18 +225,8 @@ impl<'a, 'gcc, 'tcx> Builder<'a, 'gcc, 'tcx> {
224225
.zip(args.iter())
225226
.enumerate()
226227
.map(|(index, (expected_ty, &actual_val))| {
227-
// NOTE: these intrinsics have missing parameters before the last one, so ignore the
228-
// last argument type check.
229-
// FIXME(antoyo): find a way to refactor in order to avoid this hack.
230-
match &*func_name {
231-
"__builtin_ia32_maxps512_mask" | "__builtin_ia32_maxpd512_mask"
232-
| "__builtin_ia32_minps512_mask" | "__builtin_ia32_minpd512_mask" | "__builtin_ia32_sqrtps512_mask"
233-
| "__builtin_ia32_sqrtpd512_mask" => {
234-
if index == args.len() - 1 {
235-
return actual_val;
236-
}
237-
},
238-
_ => (),
228+
if llvm::ignore_arg_cast(&func_name, index, args.len()) {
229+
return actual_val;
239230
}
240231

241232
let actual_ty = actual_val.get_type();
@@ -302,7 +293,7 @@ impl<'a, 'gcc, 'tcx> Builder<'a, 'gcc, 'tcx> {
302293
}
303294

304295
fn function_ptr_call(&mut self, func_ptr: RValue<'gcc>, args: &[RValue<'gcc>], _funclet: Option<&Funclet>) -> RValue<'gcc> {
305-
let mut args = self.check_ptr_call("call", func_ptr, args);
296+
let args = self.check_ptr_call("call", func_ptr, args);
306297

307298
// gccjit requires to use the result of functions, even when it's not used.
308299
// That's why we assign the result to a local or call add_eval().
@@ -314,92 +305,8 @@ impl<'a, 'gcc, 'tcx> Builder<'a, 'gcc, 'tcx> {
314305
if return_type != void_type {
315306
unsafe { RETURN_VALUE_COUNT += 1 };
316307
let result = current_func.new_local(None, return_type, &format!("ptrReturnValue{}", unsafe { RETURN_VALUE_COUNT }));
317-
// Some LLVM intrinsics do not map 1-to-1 to GCC intrinsics, so we add the missing
318-
// arguments here.
319-
if gcc_func.get_param_count() != args.len() {
320-
let func_name = format!("{:?}", func_ptr);
321-
match &*func_name {
322-
"__builtin_ia32_pmuldq512_mask" | "__builtin_ia32_pmuludq512_mask"
323-
// FIXME(antoyo): the following intrinsics has 4 (or 5) arguments according to the doc, but is defined with 2 (or 3) arguments in library/stdarch/crates/core_arch/src/x86/avx512f.rs.
324-
| "__builtin_ia32_pmaxsd512_mask" | "__builtin_ia32_pmaxsq512_mask" | "__builtin_ia32_pmaxsq256_mask"
325-
| "__builtin_ia32_pmaxsq128_mask" | "__builtin_ia32_maxps512_mask" | "__builtin_ia32_maxpd512_mask"
326-
| "__builtin_ia32_pmaxud512_mask" | "__builtin_ia32_pmaxuq512_mask" | "__builtin_ia32_pmaxuq256_mask"
327-
| "__builtin_ia32_pmaxuq128_mask"
328-
| "__builtin_ia32_pminsd512_mask" | "__builtin_ia32_pminsq512_mask" | "__builtin_ia32_pminsq256_mask"
329-
| "__builtin_ia32_pminsq128_mask" | "__builtin_ia32_minps512_mask" | "__builtin_ia32_minpd512_mask"
330-
| "__builtin_ia32_pminud512_mask" | "__builtin_ia32_pminuq512_mask" | "__builtin_ia32_pminuq256_mask"
331-
| "__builtin_ia32_pminuq128_mask" | "__builtin_ia32_sqrtps512_mask" | "__builtin_ia32_sqrtpd512_mask"
332-
=> {
333-
// TODO: refactor by separating those intrinsics outside of this branch.
334-
let add_before_last_arg =
335-
match &*func_name {
336-
"__builtin_ia32_maxps512_mask" | "__builtin_ia32_maxpd512_mask"
337-
| "__builtin_ia32_minps512_mask" | "__builtin_ia32_minpd512_mask"
338-
| "__builtin_ia32_sqrtps512_mask" | "__builtin_ia32_sqrtpd512_mask" => true,
339-
_ => false,
340-
};
341-
let new_first_arg_is_zero =
342-
match &*func_name {
343-
"__builtin_ia32_pmaxuq256_mask" | "__builtin_ia32_pmaxuq128_mask"
344-
| "__builtin_ia32_pminuq256_mask" | "__builtin_ia32_pminuq128_mask" => true,
345-
_ => false
346-
};
347-
let arg3_index =
348-
match &*func_name {
349-
"__builtin_ia32_sqrtps512_mask" | "__builtin_ia32_sqrtpd512_mask" => 1,
350-
_ => 2,
351-
};
352-
let mut new_args = args.to_vec();
353-
let arg3_type = gcc_func.get_param_type(arg3_index);
354-
let first_arg =
355-
if new_first_arg_is_zero {
356-
let vector_type = arg3_type.dyncast_vector().expect("vector type");
357-
let zero = self.context.new_rvalue_zero(vector_type.get_element_type());
358-
let num_units = vector_type.get_num_units();
359-
self.context.new_rvalue_from_vector(None, arg3_type, &vec![zero; num_units])
360-
}
361-
else {
362-
self.current_func().new_local(None, arg3_type, "undefined_for_intrinsic").to_rvalue()
363-
};
364-
if add_before_last_arg {
365-
new_args.insert(new_args.len() - 1, first_arg);
366-
}
367-
else {
368-
new_args.push(first_arg);
369-
}
370-
let arg4_index =
371-
match &*func_name {
372-
"__builtin_ia32_sqrtps512_mask" | "__builtin_ia32_sqrtpd512_mask" => 2,
373-
_ => 3,
374-
};
375-
let arg4_type = gcc_func.get_param_type(arg4_index);
376-
let minus_one = self.context.new_rvalue_from_int(arg4_type, -1);
377-
if add_before_last_arg {
378-
new_args.insert(new_args.len() - 1, minus_one);
379-
}
380-
else {
381-
new_args.push(minus_one);
382-
}
383-
args = new_args.into();
384-
},
385-
"__builtin_ia32_vfmaddps512_mask" | "__builtin_ia32_vfmaddpd512_mask" => {
386-
let mut new_args = args.to_vec();
387-
if args.len() == 3 {
388-
// Both llvm.fma.v16f32 and llvm.x86.avx512.vfmaddsub.ps.512 maps to
389-
// the same GCC intrinsic, but the former has 3 parameters and the
390-
// latter has 4 so it doesn't require this additional argument.
391-
let arg4_type = gcc_func.get_param_type(3);
392-
let minus_one = self.context.new_rvalue_from_int(arg4_type, -1);
393-
new_args.push(minus_one);
394-
}
395-
396-
let arg5_type = gcc_func.get_param_type(4);
397-
new_args.push(self.context.new_rvalue_from_int(arg5_type, 4));
398-
args = new_args.into();
399-
},
400-
_ => (),
401-
}
402-
}
308+
let func_name = format!("{:?}", func_ptr);
309+
let args = llvm::adjust_intrinsic_arguments(&self, gcc_func, args, &func_name);
403310
self.block.add_assignment(None, result, self.cx.context.new_call_through_ptr(None, func_ptr, &args));
404311
result.to_rvalue()
405312
}
@@ -1514,11 +1421,11 @@ impl<'a, 'gcc, 'tcx> Builder<'a, 'gcc, 'tcx> {
15141421
self.vector_reduce(src, |a, b, context| context.new_binary_op(None, op, a.get_type(), a, b))
15151422
}
15161423

1517-
pub fn vector_reduce_fadd_fast(&mut self, acc: RValue<'gcc>, src: RValue<'gcc>) -> RValue<'gcc> {
1424+
pub fn vector_reduce_fadd_fast(&mut self, _acc: RValue<'gcc>, _src: RValue<'gcc>) -> RValue<'gcc> {
15181425
unimplemented!();
15191426
}
15201427

1521-
pub fn vector_reduce_fmul_fast(&mut self, acc: RValue<'gcc>, src: RValue<'gcc>) -> RValue<'gcc> {
1428+
pub fn vector_reduce_fmul_fast(&mut self, _acc: RValue<'gcc>, _src: RValue<'gcc>) -> RValue<'gcc> {
15221429
unimplemented!();
15231430
}
15241431

@@ -1553,6 +1460,10 @@ impl<'a, 'gcc, 'tcx> Builder<'a, 'gcc, 'tcx> {
15531460
let ones = vec![self.context.new_rvalue_one(element_type); num_units];
15541461
let ones = self.context.new_rvalue_from_vector(None, cond_type, &ones);
15551462
let inverted_masks = masks + ones;
1463+
// NOTE: sometimes, the type of else_val can be different than the type of then_val in
1464+
// libgccjit (vector of int vs vector of int32_t), but they should be the same for the AND
1465+
// operation to work.
1466+
let else_val = self.context.new_bitcast(None, else_val, then_val.get_type());
15561467
let else_vals = inverted_masks & else_val;
15571468

15581469
then_vals | else_vals

0 commit comments

Comments
 (0)