Skip to content

Commit 314b5f6

Browse files
committed
x86_64 sysv64 vararg: support structs
1 parent 2c3b12f commit 314b5f6

File tree

1 file changed

+166
-44
lines changed

1 file changed

+166
-44
lines changed

compiler/rustc_codegen_llvm/src/va_arg.rs

Lines changed: 166 additions & 44 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,10 @@
1-
use rustc_abi::{Align, BackendRepr, Endian, HasDataLayout, Primitive, Size};
1+
use rustc_abi::{Align, BackendRepr, Endian, HasDataLayout, Primitive, Size, TyAndLayout};
2+
use rustc_codegen_ssa::MemFlags;
23
use rustc_codegen_ssa::common::IntPredicate;
34
use rustc_codegen_ssa::mir::operand::OperandRef;
4-
use rustc_codegen_ssa::traits::{BaseTypeCodegenMethods, BuilderMethods, ConstCodegenMethods};
5+
use rustc_codegen_ssa::traits::{
6+
BaseTypeCodegenMethods, BuilderMethods, ConstCodegenMethods, LayoutTypeCodegenMethods,
7+
};
58
use rustc_middle::ty::Ty;
69
use rustc_middle::ty::layout::{HasTyCtxt, LayoutOf};
710

@@ -325,12 +328,16 @@ fn emit_x86_64_sysv64_va_arg<'ll, 'tcx>(
325328
// } va_list[1];
326329
let va_list_addr = list.immediate();
327330

328-
let unsigned_int_offset = 4;
329-
let ptr_offset = 8;
330-
let gp_offset_ptr = va_list_addr;
331-
let fp_offset_ptr = bx.inbounds_ptradd(va_list_addr, bx.cx.const_usize(unsigned_int_offset));
331+
// Peel off any newtype wrappers.
332+
let layout = {
333+
let mut layout = bx.cx.layout_of(target_ty);
332334

333-
let layout = bx.cx.layout_of(target_ty);
335+
while let Some((_, inner)) = layout.non_1zst_field(bx.cx) {
336+
layout = inner;
337+
}
338+
339+
layout
340+
};
334341

335342
// AMD64-ABI 3.5.7p5: Step 1. Determine whether type may be passed
336343
// in the registers. If not go to step 7.
@@ -342,37 +349,48 @@ fn emit_x86_64_sysv64_va_arg<'ll, 'tcx>(
342349
let mut num_gp_registers = 0;
343350
let mut num_fp_registers = 0;
344351

352+
let mut registers_for_primitive = |p| match p {
353+
Primitive::Int(integer, _is_signed) => {
354+
num_gp_registers += integer.size().bytes().div_ceil(8) as u32;
355+
}
356+
Primitive::Float(float) => {
357+
num_fp_registers += float.size().bytes().div_ceil(16) as u32;
358+
}
359+
Primitive::Pointer(_) => {
360+
num_gp_registers += 1;
361+
}
362+
};
363+
345364
match layout.layout.backend_repr() {
346-
BackendRepr::Scalar(scalar) => match scalar.primitive() {
347-
Primitive::Int(integer, _is_signed) => {
348-
num_gp_registers += integer.size().bytes().div_ceil(8) as u32;
349-
}
350-
Primitive::Float(float) => {
351-
num_fp_registers += float.size().bytes().div_ceil(16) as u32;
352-
}
353-
Primitive::Pointer(_) => {
354-
num_gp_registers += 1;
355-
}
356-
},
357-
BackendRepr::ScalarPair(..)
358-
| BackendRepr::SimdVector { .. }
359-
| BackendRepr::Memory { .. } => {
365+
BackendRepr::Scalar(scalar) => {
366+
registers_for_primitive(scalar.primitive());
367+
}
368+
BackendRepr::ScalarPair(scalar1, scalar2) => {
369+
registers_for_primitive(scalar1.primitive());
370+
registers_for_primitive(scalar2.primitive());
371+
}
372+
BackendRepr::SimdVector { .. } => {
360373
// Because no instance of VaArgSafe uses a non-scalar `BackendRepr`.
361374
unreachable!(
362375
"No x86-64 SysV va_arg implementation for {:?}",
363376
layout.layout.backend_repr()
364377
)
365378
}
379+
BackendRepr::Memory { .. } => {
380+
let mem_addr = x86_64_sysv64_va_arg_from_memory(bx, va_list_addr, layout);
381+
return bx.load(layout.llvm_type(bx), mem_addr, layout.align.abi);
382+
}
366383
};
367384

368-
if num_gp_registers == 0 && num_fp_registers == 0 {
369-
unreachable!("VaArgSafe is not implemented for ZSTs")
370-
}
371-
372385
// AMD64-ABI 3.5.7p5: Step 3. Verify whether arguments fit into
373386
// registers. In the case: l->gp_offset > 48 - num_gp * 8 or
374387
// l->fp_offset > 176 - num_fp * 16 go to step 7.
375388

389+
let unsigned_int_offset = 4;
390+
let ptr_offset = 8;
391+
let gp_offset_ptr = va_list_addr;
392+
let fp_offset_ptr = bx.inbounds_ptradd(va_list_addr, bx.cx.const_usize(unsigned_int_offset));
393+
376394
let gp_offset_v = bx.load(bx.type_i32(), gp_offset_ptr, Align::from_bytes(8).unwrap());
377395
let fp_offset_v = bx.load(bx.type_i32(), fp_offset_ptr, Align::from_bytes(4).unwrap());
378396

@@ -413,14 +431,87 @@ fn emit_x86_64_sysv64_va_arg<'ll, 'tcx>(
413431
bx.inbounds_ptradd(va_list_addr, bx.cx.const_usize(2 * unsigned_int_offset + ptr_offset));
414432
let reg_save_area_v = bx.load(bx.type_ptr(), reg_save_area_ptr, dl.pointer_align.abi);
415433

416-
let reg_addr = if num_gp_registers > 0 && num_fp_registers > 0 {
417-
unreachable!("instances of VaArgSafe cannot use both int and sse registers");
418-
} else if num_gp_registers > 0 || num_fp_registers == 1 {
419-
let gp_or_fp_offset = if num_gp_registers > 0 { gp_offset_v } else { fp_offset_v };
420-
bx.gep(bx.type_i8(), reg_save_area_v, &[gp_or_fp_offset])
421-
} else {
422-
// assert_eq!(num_sse_registers, 2);
423-
unreachable!("all instances of VaArgSafe have an alignment <= 8");
434+
let reg_addr = match layout.layout.backend_repr() {
435+
BackendRepr::Scalar(scalar) => match scalar.primitive() {
436+
Primitive::Int(_, _) | Primitive::Pointer(_) => {
437+
let reg_addr = bx.inbounds_ptradd(reg_save_area_v, gp_offset_v);
438+
439+
// Copy into a temporary if the type is more aligned than the register save area.
440+
copy_to_temporary_if_more_aligned(bx, reg_addr, layout)
441+
}
442+
Primitive::Float(_) => bx.inbounds_ptradd(reg_save_area_v, fp_offset_v),
443+
},
444+
BackendRepr::ScalarPair(scalar1, scalar2) => {
445+
let ty_lo = bx.cx().scalar_pair_element_backend_type(layout, 0, false);
446+
let ty_hi = bx.cx().scalar_pair_element_backend_type(layout, 1, false);
447+
448+
let align_lo = layout.field(bx.cx, 0).layout.align().abi;
449+
let align_hi = layout.field(bx.cx, 1).layout.align().abi;
450+
451+
match (scalar1.primitive(), scalar2.primitive()) {
452+
(Primitive::Float(_), Primitive::Float(_)) => {
453+
// SSE registers are spaced 16 bytes apart in the register save
454+
// area, we need to collect the two eightbytes together.
455+
// The ABI isn't explicit about this, but it seems reasonable
456+
// to assume that the slots are 16-byte aligned, since the stack is
457+
// naturally 16-byte aligned and the prologue is expected to store
458+
// all the SSE registers to the RSA.
459+
let reg_lo_addr = bx.inbounds_ptradd(reg_save_area_v, fp_offset_v);
460+
let reg_hi_addr = bx.inbounds_ptradd(reg_lo_addr, bx.const_i32(16));
461+
462+
let align = layout.layout.align().abi;
463+
let tmp = bx.alloca(layout.layout.size(), align);
464+
465+
let reg_lo = bx.load(ty_lo, reg_lo_addr, align_lo);
466+
let reg_hi = bx.load(ty_hi, reg_hi_addr, align_hi);
467+
468+
let offset = scalar1.size(bx.cx).align_to(align_hi).bytes();
469+
let field0 = tmp;
470+
let field1 = bx.inbounds_ptradd(tmp, bx.const_u32(offset as u32));
471+
472+
bx.store(reg_lo, field0, align);
473+
bx.store(reg_hi, field1, align);
474+
475+
tmp
476+
}
477+
(Primitive::Float(_), _) | (_, Primitive::Float(_)) => {
478+
let gp_addr = bx.inbounds_ptradd(reg_save_area_v, gp_offset_v);
479+
let fp_addr = bx.inbounds_ptradd(reg_save_area_v, fp_offset_v);
480+
481+
let (reg_lo_addr, reg_hi_addr) = match scalar1.primitive() {
482+
Primitive::Float(_) => (fp_addr, gp_addr),
483+
Primitive::Int(_, _) | Primitive::Pointer(_) => (gp_addr, fp_addr),
484+
};
485+
486+
let tmp = bx.alloca(layout.layout.size(), layout.layout.align().abi);
487+
488+
let reg_lo = bx.load(ty_lo, reg_lo_addr, align_lo);
489+
let reg_hi = bx.load(ty_hi, reg_hi_addr, align_hi);
490+
491+
let offset = scalar1.size(bx.cx).align_to(align_hi).bytes();
492+
let field0 = tmp;
493+
let field1 = bx.inbounds_ptradd(tmp, bx.const_u32(offset as u32));
494+
495+
bx.store(reg_lo, field0, align_lo);
496+
bx.store(reg_hi, field1, align_hi);
497+
498+
tmp
499+
}
500+
(_, _) => {
501+
// Two integer/pointer values are just contiguous in memory.
502+
let reg_addr = bx.inbounds_ptradd(reg_save_area_v, gp_offset_v);
503+
504+
// Copy into a temporary if the type is more aligned than the register save area.
505+
copy_to_temporary_if_more_aligned(bx, reg_addr, layout)
506+
}
507+
}
508+
}
509+
BackendRepr::SimdVector { .. } => {
510+
unreachable!("panics in the previous match on `backend_repr`")
511+
}
512+
BackendRepr::Memory { .. } => {
513+
unreachable!("early returns in the previous match on `backend_repr`")
514+
}
424515
};
425516

426517
// AMD64-ABI 3.5.7p5: Step 5. Set:
@@ -441,9 +532,47 @@ fn emit_x86_64_sysv64_va_arg<'ll, 'tcx>(
441532
bx.br(end);
442533

443534
bx.switch_to_block(in_mem);
535+
let mem_addr = x86_64_sysv64_va_arg_from_memory(bx, va_list_addr, layout);
536+
bx.br(end);
444537

445-
let overflow_arg_area_ptr =
446-
bx.inbounds_ptradd(va_list_addr, bx.cx.const_usize(2 * unsigned_int_offset));
538+
bx.switch_to_block(end);
539+
540+
let val_type = layout.llvm_type(bx);
541+
let val_addr = bx.phi(bx.type_ptr(), &[reg_addr, mem_addr], &[in_reg, in_mem]);
542+
543+
bx.load(val_type, val_addr, layout.align.abi)
544+
}
545+
546+
/// Copy into a temporary if the type is more aligned than the register save area.
547+
fn copy_to_temporary_if_more_aligned<'ll, 'tcx>(
548+
bx: &mut Builder<'_, 'll, 'tcx>,
549+
reg_addr: &'ll Value,
550+
layout: TyAndLayout<'tcx, Ty<'tcx>>,
551+
) -> &'ll Value {
552+
if layout.layout.align.abi.bytes() > 8 {
553+
let tmp = bx.alloca(layout.layout.size(), layout.layout.align().abi);
554+
bx.memcpy(
555+
tmp,
556+
layout.layout.align.abi,
557+
reg_addr,
558+
Align::from_bytes(8).unwrap(),
559+
bx.const_u32(layout.layout.size().bytes() as u32),
560+
MemFlags::empty(),
561+
);
562+
tmp
563+
} else {
564+
reg_addr
565+
}
566+
}
567+
568+
fn x86_64_sysv64_va_arg_from_memory<'ll, 'tcx>(
569+
bx: &mut Builder<'_, 'll, 'tcx>,
570+
va_list_addr: &'ll Value,
571+
layout: TyAndLayout<'tcx, Ty<'tcx>>,
572+
) -> &'ll Value {
573+
let dl = bx.cx.data_layout();
574+
575+
let overflow_arg_area_ptr = bx.inbounds_ptradd(va_list_addr, bx.const_usize(8));
447576

448577
let overflow_arg_area_v = bx.load(bx.type_ptr(), overflow_arg_area_ptr, dl.pointer_align.abi);
449578
// AMD64-ABI 3.5.7p5: Step 7. Align l->overflow_arg_area upwards to a 16
@@ -463,17 +592,10 @@ fn emit_x86_64_sysv64_va_arg<'ll, 'tcx>(
463592
// an 8 byte boundary.
464593
let size_in_bytes = layout.layout.size().bytes();
465594
let offset = bx.const_i32(size_in_bytes.next_multiple_of(8) as i32);
466-
let overflow_arg_area = bx.gep(bx.type_i8(), overflow_arg_area_v, &[offset]);
595+
let overflow_arg_area = bx.inbounds_ptradd(overflow_arg_area_v, offset);
467596
bx.store(overflow_arg_area, overflow_arg_area_ptr, dl.pointer_align.abi);
468597

469-
bx.br(end);
470-
471-
bx.switch_to_block(end);
472-
473-
let val_type = layout.llvm_type(bx);
474-
let val_addr = bx.phi(bx.type_ptr(), &[reg_addr, mem_addr], &[in_reg, in_mem]);
475-
476-
bx.load(val_type, val_addr, layout.align.abi)
598+
mem_addr
477599
}
478600

479601
fn emit_xtensa_va_arg<'ll, 'tcx>(

0 commit comments

Comments
 (0)