Skip to content

Commit 6bfe2b0

Browse files
committed
Support more SIMD intrinsics
1 parent ace3250 commit 6bfe2b0

File tree

5 files changed

+141
-5
lines changed

5 files changed

+141
-5
lines changed

src/builder.rs

Lines changed: 103 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -217,11 +217,27 @@ impl<'a, 'gcc, 'tcx> Builder<'a, 'gcc, 'tcx> {
217217
return Cow::Borrowed(args);
218218
}
219219

220+
let func_name = format!("{:?}", func_ptr);
221+
220222
let casted_args: Vec<_> = param_types
221223
.into_iter()
222224
.zip(args.iter())
223225
.enumerate()
224226
.map(|(index, (expected_ty, &actual_val))| {
227+
// NOTE: these intrinsics have missing parameters before the last one, so ignore the
228+
// last argument type check.
229+
// FIXME(antoyo): find a way to refactor in order to avoid this hack.
230+
match &*func_name {
231+
"__builtin_ia32_maxps512_mask" | "__builtin_ia32_maxpd512_mask"
232+
| "__builtin_ia32_minps512_mask" | "__builtin_ia32_minpd512_mask" | "__builtin_ia32_sqrtps512_mask"
233+
| "__builtin_ia32_sqrtpd512_mask" => {
234+
if index == args.len() - 1 {
235+
return actual_val;
236+
}
237+
},
238+
_ => (),
239+
}
240+
225241
let actual_ty = actual_val.get_type();
226242
if expected_ty != actual_ty {
227243
if !actual_ty.is_vector() && !expected_ty.is_vector() && actual_ty.is_integral() && expected_ty.is_integral() && actual_ty.get_size() != expected_ty.get_size() {
@@ -286,7 +302,7 @@ impl<'a, 'gcc, 'tcx> Builder<'a, 'gcc, 'tcx> {
286302
}
287303

288304
fn function_ptr_call(&mut self, func_ptr: RValue<'gcc>, args: &[RValue<'gcc>], _funclet: Option<&Funclet>) -> RValue<'gcc> {
289-
let args = self.check_ptr_call("call", func_ptr, args);
305+
let mut args = self.check_ptr_call("call", func_ptr, args);
290306

291307
// gccjit requires to use the result of functions, even when it's not used.
292308
// That's why we assign the result to a local or call add_eval().
@@ -298,6 +314,92 @@ impl<'a, 'gcc, 'tcx> Builder<'a, 'gcc, 'tcx> {
298314
if return_type != void_type {
299315
unsafe { RETURN_VALUE_COUNT += 1 };
300316
let result = current_func.new_local(None, return_type, &format!("ptrReturnValue{}", unsafe { RETURN_VALUE_COUNT }));
317+
// Some LLVM intrinsics do not map 1-to-1 to GCC intrinsics, so we add the missing
318+
// arguments here.
319+
if gcc_func.get_param_count() != args.len() {
320+
let func_name = format!("{:?}", func_ptr);
321+
match &*func_name {
322+
"__builtin_ia32_pmuldq512_mask" | "__builtin_ia32_pmuludq512_mask"
323+
// FIXME(antoyo): the following intrinsics has 4 (or 5) arguments according to the doc, but is defined with 2 (or 3) arguments in library/stdarch/crates/core_arch/src/x86/avx512f.rs.
324+
| "__builtin_ia32_pmaxsd512_mask" | "__builtin_ia32_pmaxsq512_mask" | "__builtin_ia32_pmaxsq256_mask"
325+
| "__builtin_ia32_pmaxsq128_mask" | "__builtin_ia32_maxps512_mask" | "__builtin_ia32_maxpd512_mask"
326+
| "__builtin_ia32_pmaxud512_mask" | "__builtin_ia32_pmaxuq512_mask" | "__builtin_ia32_pmaxuq256_mask"
327+
| "__builtin_ia32_pmaxuq128_mask"
328+
| "__builtin_ia32_pminsd512_mask" | "__builtin_ia32_pminsq512_mask" | "__builtin_ia32_pminsq256_mask"
329+
| "__builtin_ia32_pminsq128_mask" | "__builtin_ia32_minps512_mask" | "__builtin_ia32_minpd512_mask"
330+
| "__builtin_ia32_pminud512_mask" | "__builtin_ia32_pminuq512_mask" | "__builtin_ia32_pminuq256_mask"
331+
| "__builtin_ia32_pminuq128_mask" | "__builtin_ia32_sqrtps512_mask" | "__builtin_ia32_sqrtpd512_mask"
332+
=> {
333+
// TODO: refactor by separating those intrinsics outside of this branch.
334+
let add_before_last_arg =
335+
match &*func_name {
336+
"__builtin_ia32_maxps512_mask" | "__builtin_ia32_maxpd512_mask"
337+
| "__builtin_ia32_minps512_mask" | "__builtin_ia32_minpd512_mask"
338+
| "__builtin_ia32_sqrtps512_mask" | "__builtin_ia32_sqrtpd512_mask" => true,
339+
_ => false,
340+
};
341+
let new_first_arg_is_zero =
342+
match &*func_name {
343+
"__builtin_ia32_pmaxuq256_mask" | "__builtin_ia32_pmaxuq128_mask"
344+
| "__builtin_ia32_pminuq256_mask" | "__builtin_ia32_pminuq128_mask" => true,
345+
_ => false
346+
};
347+
let arg3_index =
348+
match &*func_name {
349+
"__builtin_ia32_sqrtps512_mask" | "__builtin_ia32_sqrtpd512_mask" => 1,
350+
_ => 2,
351+
};
352+
let mut new_args = args.to_vec();
353+
let arg3_type = gcc_func.get_param_type(arg3_index);
354+
let first_arg =
355+
if new_first_arg_is_zero {
356+
let vector_type = arg3_type.dyncast_vector().expect("vector type");
357+
let zero = self.context.new_rvalue_zero(vector_type.get_element_type());
358+
let num_units = vector_type.get_num_units();
359+
self.context.new_rvalue_from_vector(None, arg3_type, &vec![zero; num_units])
360+
}
361+
else {
362+
self.current_func().new_local(None, arg3_type, "undefined_for_intrinsic").to_rvalue()
363+
};
364+
if add_before_last_arg {
365+
new_args.insert(new_args.len() - 1, first_arg);
366+
}
367+
else {
368+
new_args.push(first_arg);
369+
}
370+
let arg4_index =
371+
match &*func_name {
372+
"__builtin_ia32_sqrtps512_mask" | "__builtin_ia32_sqrtpd512_mask" => 2,
373+
_ => 3,
374+
};
375+
let arg4_type = gcc_func.get_param_type(arg4_index);
376+
let minus_one = self.context.new_rvalue_from_int(arg4_type, -1);
377+
if add_before_last_arg {
378+
new_args.insert(new_args.len() - 1, minus_one);
379+
}
380+
else {
381+
new_args.push(minus_one);
382+
}
383+
args = new_args.into();
384+
},
385+
"__builtin_ia32_vfmaddps512_mask" | "__builtin_ia32_vfmaddpd512_mask" => {
386+
let mut new_args = args.to_vec();
387+
if args.len() == 3 {
388+
// Both llvm.fma.v16f32 and llvm.x86.avx512.vfmaddsub.ps.512 maps to
389+
// the same GCC intrinsic, but the former has 3 parameters and the
390+
// latter has 4 so it doesn't require this additional argument.
391+
let arg4_type = gcc_func.get_param_type(3);
392+
let minus_one = self.context.new_rvalue_from_int(arg4_type, -1);
393+
new_args.push(minus_one);
394+
}
395+
396+
let arg5_type = gcc_func.get_param_type(4);
397+
new_args.push(self.context.new_rvalue_from_int(arg5_type, 4));
398+
args = new_args.into();
399+
},
400+
_ => (),
401+
}
402+
}
301403
self.block.add_assignment(None, result, self.cx.context.new_call_through_ptr(None, func_ptr, &args));
302404
result.to_rvalue()
303405
}

src/context.rs

Lines changed: 12 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,7 @@ pub struct CodegenCx<'gcc, 'tcx> {
3535
pub normal_function_addresses: RefCell<FxHashSet<RValue<'gcc>>>,
3636

3737
pub functions: RefCell<FxHashMap<String, Function<'gcc>>>,
38+
pub intrinsics: RefCell<FxHashMap<String, Function<'gcc>>>,
3839

3940
pub tls_model: gccjit::TlsModel,
4041

@@ -184,6 +185,7 @@ impl<'gcc, 'tcx> CodegenCx<'gcc, 'tcx> {
184185
current_func: RefCell::new(None),
185186
normal_function_addresses: Default::default(),
186187
functions: RefCell::new(functions),
188+
intrinsics: RefCell::new(FxHashMap::default()),
187189

188190
tls_model,
189191

@@ -315,8 +317,16 @@ impl<'gcc, 'tcx> MiscMethods<'tcx> for CodegenCx<'gcc, 'tcx> {
315317
}
316318

317319
fn get_fn_addr(&self, instance: Instance<'tcx>) -> RValue<'gcc> {
318-
let func = get_fn(self, instance);
319-
let func = self.rvalue_as_function(func);
320+
let func_name = self.tcx.symbol_name(instance).name;
321+
322+
let func =
323+
if self.intrinsics.borrow().contains_key(func_name) {
324+
self.intrinsics.borrow()[func_name].clone()
325+
}
326+
else {
327+
let func = get_fn(self, instance);
328+
self.rvalue_as_function(func)
329+
};
320330
let ptr = func.get_address(None);
321331

322332
// TODO(antoyo): don't do this twice: i.e. in declare_fn and here.

src/declare.rs

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@ use crate::intrinsic::llvm;
1111
impl<'gcc, 'tcx> CodegenCx<'gcc, 'tcx> {
1212
pub fn get_or_insert_global(&self, name: &str, ty: Type<'gcc>, is_tls: bool, link_section: Option<Symbol>) -> LValue<'gcc> {
1313
if self.globals.borrow().contains_key(name) {
14+
// TODO: use [] instead of .get().expect()?
1415
let typ = self.globals.borrow().get(name).expect("global").get_type();
1516
let global = self.context.new_global(None, GlobalKind::Imported, typ, name);
1617
if is_tls {
@@ -103,7 +104,9 @@ impl<'gcc, 'tcx> CodegenCx<'gcc, 'tcx> {
103104
/// update the declaration and return existing Value instead.
104105
fn declare_raw_fn<'gcc>(cx: &CodegenCx<'gcc, '_>, name: &str, _callconv: () /*llvm::CallConv*/, return_type: Type<'gcc>, param_types: &[Type<'gcc>], variadic: bool) -> Function<'gcc> {
105106
if name.starts_with("llvm.") {
106-
return llvm::intrinsic(name, cx);
107+
let intrinsic = llvm::intrinsic(name, cx);
108+
cx.intrinsics.borrow_mut().insert(name.to_string(), intrinsic);
109+
return intrinsic;
107110
}
108111
let func =
109112
if cx.functions.borrow().contains_key(name) {

src/intrinsic/archs.rs

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4275,5 +4275,8 @@ match name {
42754275
"llvm.xcore.getid" => "__builtin_getid",
42764276
"llvm.xcore.getps" => "__builtin_getps",
42774277
"llvm.xcore.setps" => "__builtin_setps",
4278-
_ => unimplemented!("***** unsupported LLVM intrinsic {}", name),
4278+
_ => {
4279+
println!("***** unsupported LLVM intrinsic {}", name);
4280+
""
4281+
},
42794282
}

src/intrinsic/llvm.rs

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,24 @@ pub fn intrinsic<'gcc, 'tcx>(name: &str, cx: &CodegenCx<'gcc, 'tcx>) -> Function
2121
"llvm.x86.xgetbv" => "__builtin_ia32_xgetbv",
2222
// NOTE: this doc specifies the equivalent GCC builtins: http://huonw.github.io/llvmint/llvmint/x86/index.html
2323
"llvm.sqrt.v2f64" => "__builtin_ia32_sqrtpd",
24+
"llvm.x86.avx512.pmul.dq.512" => "__builtin_ia32_pmuldq512_mask",
25+
"llvm.x86.avx512.pmulu.dq.512" => "__builtin_ia32_pmuludq512_mask",
26+
"llvm.x86.avx512.mask.pmaxs.q.256" => "__builtin_ia32_pmaxsq256_mask",
27+
"llvm.x86.avx512.mask.pmaxs.q.128" => "__builtin_ia32_pmaxsq128_mask",
28+
"llvm.x86.avx512.max.ps.512" => "__builtin_ia32_maxps512_mask",
29+
"llvm.x86.avx512.max.pd.512" => "__builtin_ia32_maxpd512_mask",
30+
"llvm.x86.avx512.mask.pmaxu.q.256" => "__builtin_ia32_pmaxuq256_mask",
31+
"llvm.x86.avx512.mask.pmaxu.q.128" => "__builtin_ia32_pmaxuq128_mask",
32+
"llvm.x86.avx512.mask.pmins.q.256" => "__builtin_ia32_pminsq256_mask",
33+
"llvm.x86.avx512.mask.pmins.q.128" => "__builtin_ia32_pminsq128_mask",
34+
"llvm.x86.avx512.min.ps.512" => "__builtin_ia32_minps512_mask",
35+
"llvm.x86.avx512.min.pd.512" => "__builtin_ia32_minpd512_mask",
36+
"llvm.x86.avx512.mask.pminu.q.256" => "__builtin_ia32_pminuq256_mask",
37+
"llvm.x86.avx512.mask.pminu.q.128" => "__builtin_ia32_pminuq128_mask",
38+
"llvm.fma.v16f32" => "__builtin_ia32_vfmaddps512_mask",
39+
"llvm.fma.v8f64" => "__builtin_ia32_vfmaddpd512_mask",
40+
"llvm.x86.avx512.vfmaddsub.ps.512" => "__builtin_ia32_vfmaddps512_mask",
41+
"llvm.x86.avx512.vfmaddsub.pd.512" => "__builtin_ia32_vfmaddpd512_mask",
2442

2543
// The above doc points to unknown builtins for the following, so override them:
2644
"llvm.x86.avx2.gather.d.d" => "__builtin_ia32_gathersiv4si",

0 commit comments

Comments
 (0)