Skip to content

Commit dcdc645

Browse files
committed
x86_64 sysv64 vararg: support structs
1 parent 584035c commit dcdc645

File tree

1 file changed

+154
-43
lines changed

1 file changed

+154
-43
lines changed

compiler/rustc_codegen_llvm/src/va_arg.rs

Lines changed: 154 additions & 43 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,10 @@
1-
use rustc_abi::{Align, BackendRepr, Endian, HasDataLayout, Primitive, Size};
1+
use rustc_abi::{Align, BackendRepr, Endian, HasDataLayout, Primitive, Size, TyAndLayout};
2+
use rustc_codegen_ssa::MemFlags;
23
use rustc_codegen_ssa::common::IntPredicate;
34
use rustc_codegen_ssa::mir::operand::OperandRef;
4-
use rustc_codegen_ssa::traits::{BaseTypeCodegenMethods, BuilderMethods, ConstCodegenMethods};
5+
use rustc_codegen_ssa::traits::{
6+
BaseTypeCodegenMethods, BuilderMethods, ConstCodegenMethods, LayoutTypeCodegenMethods,
7+
};
58
use rustc_middle::ty::Ty;
69
use rustc_middle::ty::layout::{HasTyCtxt, LayoutOf};
710

@@ -300,11 +303,6 @@ fn emit_x86_64_sysv64_va_arg<'ll, 'tcx>(
300303
// } va_list[1];
301304
let va_list_addr = list.immediate();
302305

303-
let unsigned_int_offset = 4;
304-
let ptr_offset = 8;
305-
let gp_offset_ptr = va_list_addr;
306-
let fp_offset_ptr = bx.inbounds_ptradd(va_list_addr, bx.cx.const_usize(unsigned_int_offset));
307-
308306
let layout = bx.cx.layout_of(target_ty);
309307

310308
// AMD64-ABI 3.5.7p5: Step 1. Determine whether type may be passed
@@ -317,37 +315,48 @@ fn emit_x86_64_sysv64_va_arg<'ll, 'tcx>(
317315
let mut num_gp_registers = 0;
318316
let mut num_fp_registers = 0;
319317

318+
let mut registers_for_primitive = |p| match p {
319+
Primitive::Int(integer, _is_signed) => {
320+
num_gp_registers += integer.size().bytes().div_ceil(8) as u32;
321+
}
322+
Primitive::Float(float) => {
323+
num_fp_registers += float.size().bytes().div_ceil(16) as u32;
324+
}
325+
Primitive::Pointer(_) => {
326+
num_gp_registers += 1;
327+
}
328+
};
329+
320330
match layout.layout.backend_repr() {
321-
BackendRepr::Scalar(scalar) => match scalar.primitive() {
322-
Primitive::Int(integer, _is_signed) => {
323-
num_gp_registers += integer.size().bytes().div_ceil(8) as u32;
324-
}
325-
Primitive::Float(float) => {
326-
num_fp_registers += float.size().bytes().div_ceil(16) as u32;
327-
}
328-
Primitive::Pointer(_) => {
329-
num_gp_registers += 1;
330-
}
331-
},
332-
BackendRepr::ScalarPair(..)
333-
| BackendRepr::SimdVector { .. }
334-
| BackendRepr::Memory { .. } => {
331+
BackendRepr::Scalar(scalar) => {
332+
registers_for_primitive(scalar.primitive());
333+
}
334+
BackendRepr::ScalarPair(scalar1, scalar2) => {
335+
registers_for_primitive(scalar1.primitive());
336+
registers_for_primitive(scalar2.primitive());
337+
}
338+
BackendRepr::SimdVector { .. } => {
335339
// Because no instance of VaArgSafe uses a non-scalar `BackendRepr`.
336340
unreachable!(
337341
"No x86-64 SysV va_arg implementation for {:?}",
338342
layout.layout.backend_repr()
339343
)
340344
}
345+
BackendRepr::Memory { .. } => {
346+
let mem_addr = x86_64_sysv64_va_arg_from_memory(bx, va_list_addr, layout);
347+
return bx.load(layout.llvm_type(bx), mem_addr, layout.align.abi);
348+
}
341349
};
342350

343-
if num_gp_registers == 0 && num_fp_registers == 0 {
344-
unreachable!("VaArgSafe is not implemented for ZSTs")
345-
}
346-
347351
// AMD64-ABI 3.5.7p5: Step 3. Verify whether arguments fit into
348352
// registers. In the case: l->gp_offset > 48 - num_gp * 8 or
349353
// l->fp_offset > 176 - num_fp * 16 go to step 7.
350354

355+
let unsigned_int_offset = 4;
356+
let ptr_offset = 8;
357+
let gp_offset_ptr = va_list_addr;
358+
let fp_offset_ptr = bx.inbounds_ptradd(va_list_addr, bx.cx.const_usize(unsigned_int_offset));
359+
351360
let gp_offset_v = bx.load(bx.type_i32(), gp_offset_ptr, Align::from_bytes(8).unwrap());
352361
let fp_offset_v = bx.load(bx.type_i32(), fp_offset_ptr, Align::from_bytes(4).unwrap());
353362

@@ -388,14 +397,85 @@ fn emit_x86_64_sysv64_va_arg<'ll, 'tcx>(
388397
bx.inbounds_ptradd(va_list_addr, bx.cx.const_usize(2 * unsigned_int_offset + ptr_offset));
389398
let reg_save_area_v = bx.load(bx.type_ptr(), reg_save_area_ptr, dl.pointer_align.abi);
390399

391-
let reg_addr = if num_gp_registers > 0 && num_fp_registers > 0 {
392-
unreachable!("instances of VaArgSafe cannot use both int and sse registers");
393-
} else if num_gp_registers > 0 || num_fp_registers == 1 {
394-
let gp_or_fp_offset = if num_gp_registers > 0 { gp_offset_v } else { fp_offset_v };
395-
bx.gep(bx.type_i8(), reg_save_area_v, &[gp_or_fp_offset])
396-
} else {
397-
// assert_eq!(num_sse_registers, 2);
398-
unreachable!("all instances of VaArgSafe have an alignment <= 8");
400+
let reg_addr = match layout.layout.backend_repr() {
401+
BackendRepr::Scalar(scalar) => match scalar.primitive() {
402+
Primitive::Int(_, _) | Primitive::Pointer(_) => {
403+
let reg_addr = bx.gep(bx.type_i8(), reg_save_area_v, &[gp_offset_v]);
404+
405+
// Copy into a temporary if the type is more aligned than the register save area.
406+
copy_to_temporary_if_more_aligned(bx, reg_addr, layout)
407+
}
408+
Primitive::Float(_) => bx.gep(bx.type_i8(), reg_save_area_v, &[fp_offset_v]),
409+
},
410+
BackendRepr::ScalarPair(scalar1, scalar2) => {
411+
let ty_lo = bx.cx().scalar_pair_element_backend_type(layout, 0, false);
412+
let ty_hi = bx.cx().scalar_pair_element_backend_type(layout, 1, false);
413+
414+
let align_lo = layout.field(bx.cx, 0).layout.align().abi;
415+
let align_hi = layout.field(bx.cx, 1).layout.align().abi;
416+
417+
match (scalar1.primitive(), scalar2.primitive()) {
418+
(Primitive::Float(_), Primitive::Float(_)) => {
419+
// SSE registers are spaced 16 bytes apart in the register save
420+
// area, we need to collect the two eightbytes together.
421+
// The ABI isn't explicit about this, but it seems reasonable
422+
// to assume that the slots are 16-byte aligned, since the stack is
423+
// naturally 16-byte aligned and the prologue is expected to store
424+
// all the SSE registers to the RSA.
425+
let reg_lo_addr = bx.gep(bx.type_i8(), reg_save_area_v, &[fp_offset_v]);
426+
let reg_hi_addr = bx.gep(bx.type_i8(), reg_lo_addr, &[bx.const_i32(16)]);
427+
428+
let tmp = bx.alloca(layout.layout.size(), layout.layout.align().abi);
429+
let align = layout.layout.align().abi;
430+
431+
let reg_lo = bx.load(ty_lo, reg_lo_addr, align_lo);
432+
let reg_hi = bx.load(ty_hi, reg_hi_addr, align_hi);
433+
434+
let field0 = tmp;
435+
let field1 = bx.gep(bx.type_i8(), tmp, &[bx.const_i32(8)]);
436+
437+
bx.store(reg_lo, field0, align);
438+
bx.store(reg_hi, field1, align);
439+
440+
tmp
441+
}
442+
(Primitive::Float(_), _) | (_, Primitive::Float(_)) => {
443+
let gp_addr = bx.gep(bx.type_i8(), reg_save_area_v, &[gp_offset_v]);
444+
let fp_addr = bx.gep(bx.type_i8(), reg_save_area_v, &[fp_offset_v]);
445+
446+
let (reg_lo_addr, reg_hi_addr) = match scalar1.primitive() {
447+
Primitive::Float(_) => (fp_addr, gp_addr),
448+
Primitive::Int(_, _) | Primitive::Pointer(_) => (gp_addr, fp_addr),
449+
};
450+
451+
let tmp = bx.alloca(layout.layout.size(), layout.layout.align().abi);
452+
453+
let reg_lo = bx.load(ty_lo, reg_lo_addr, align_lo);
454+
let reg_hi = bx.load(ty_hi, reg_hi_addr, align_hi);
455+
456+
let field0 = tmp;
457+
let field1 = bx.gep(bx.type_i8(), tmp, &[bx.const_i32(8)]);
458+
459+
bx.store(reg_lo, field0, layout.layout.align().abi);
460+
bx.store(reg_hi, field1, layout.layout.align().abi);
461+
462+
tmp
463+
}
464+
(_, _) => {
465+
// Two integer/pointer values are just contiguous in memory.
466+
let reg_addr = bx.gep(bx.type_i8(), reg_save_area_v, &[gp_offset_v]);
467+
468+
// Copy into a temporary if the type is more aligned than the register save area.
469+
copy_to_temporary_if_more_aligned(bx, reg_addr, layout)
470+
}
471+
}
472+
}
473+
BackendRepr::SimdVector { .. } => {
474+
unreachable!("panics in the previous match on `backend_repr`")
475+
}
476+
BackendRepr::Memory { .. } => {
477+
unreachable!("early returns in the previous match on `backend_repr`")
478+
}
399479
};
400480

401481
// AMD64-ABI 3.5.7p5: Step 5. Set:
@@ -416,9 +496,47 @@ fn emit_x86_64_sysv64_va_arg<'ll, 'tcx>(
416496
bx.br(end);
417497

418498
bx.switch_to_block(in_mem);
499+
let mem_addr = x86_64_sysv64_va_arg_from_memory(bx, va_list_addr, layout);
500+
bx.br(end);
501+
502+
bx.switch_to_block(end);
503+
504+
let val_type = layout.llvm_type(bx);
505+
let val_addr = bx.phi(bx.type_ptr(), &[reg_addr, mem_addr], &[in_reg, in_mem]);
419506

420-
let overflow_arg_area_ptr =
421-
bx.inbounds_ptradd(va_list_addr, bx.cx.const_usize(2 * unsigned_int_offset));
507+
bx.load(val_type, val_addr, layout.align.abi)
508+
}
509+
510+
/// Copy into a temporary if the type is more aligned than the register save area.
511+
fn copy_to_temporary_if_more_aligned<'ll, 'tcx>(
512+
bx: &mut Builder<'_, 'll, 'tcx>,
513+
reg_addr: &'ll Value,
514+
layout: TyAndLayout<'tcx, Ty<'tcx>>,
515+
) -> &'ll Value {
516+
if layout.layout.align.abi.bytes() > 8 {
517+
let tmp = bx.alloca(layout.layout.size(), layout.layout.align().abi);
518+
bx.memcpy(
519+
tmp,
520+
layout.layout.align.abi,
521+
reg_addr,
522+
Align::from_bytes(8).unwrap(),
523+
bx.const_u32(layout.layout.size().bytes() as u32),
524+
MemFlags::empty(),
525+
);
526+
tmp
527+
} else {
528+
reg_addr
529+
}
530+
}
531+
532+
fn x86_64_sysv64_va_arg_from_memory<'ll, 'tcx>(
533+
bx: &mut Builder<'_, 'll, 'tcx>,
534+
va_list_addr: &'ll Value,
535+
layout: TyAndLayout<'tcx, Ty<'tcx>>,
536+
) -> &'ll Value {
537+
let dl = bx.cx.data_layout();
538+
539+
let overflow_arg_area_ptr = bx.inbounds_ptradd(va_list_addr, bx.const_usize(8));
422540

423541
let overflow_arg_area_v = bx.load(bx.type_ptr(), overflow_arg_area_ptr, dl.pointer_align.abi);
424542
// AMD64-ABI 3.5.7p5: Step 7. Align l->overflow_arg_area upwards to a 16
@@ -441,14 +559,7 @@ fn emit_x86_64_sysv64_va_arg<'ll, 'tcx>(
441559
let overflow_arg_area = bx.gep(bx.type_i8(), overflow_arg_area_v, &[offset]);
442560
bx.store(overflow_arg_area, overflow_arg_area_ptr, dl.pointer_align.abi);
443561

444-
bx.br(end);
445-
446-
bx.switch_to_block(end);
447-
448-
let val_type = layout.llvm_type(bx);
449-
let val_addr = bx.phi(bx.type_ptr(), &[reg_addr, mem_addr], &[in_reg, in_mem]);
450-
451-
bx.load(val_type, val_addr, layout.align.abi)
562+
mem_addr
452563
}
453564

454565
fn emit_xtensa_va_arg<'ll, 'tcx>(

0 commit comments

Comments
 (0)