Skip to content

[AArch64][GlobalISel] Implement selectVaStartAAPCS #106979

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 11 commits into from
Sep 19, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
101 changes: 100 additions & 1 deletion llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1994,7 +1994,106 @@ bool AArch64InstructionSelector::selectVectorAshrLshr(

bool AArch64InstructionSelector::selectVaStartAAPCS(
MachineInstr &I, MachineFunction &MF, MachineRegisterInfo &MRI) const {
return false;

if (STI.isCallingConvWin64(MF.getFunction().getCallingConv(),
MF.getFunction().isVarArg()))
return false;

// The layout of the va_list struct is specified in the AArch64 Procedure Call
// Standard, section 10.1.5.

const AArch64FunctionInfo *FuncInfo = MF.getInfo<AArch64FunctionInfo>();
const unsigned PtrSize = STI.isTargetILP32() ? 4 : 8;
const auto *PtrRegClass =
STI.isTargetILP32() ? &AArch64::GPR32RegClass : &AArch64::GPR64RegClass;

const MCInstrDesc &MCIDAddAddr =
TII.get(STI.isTargetILP32() ? AArch64::ADDWri : AArch64::ADDXri);
const MCInstrDesc &MCIDStoreAddr =
TII.get(STI.isTargetILP32() ? AArch64::STRWui : AArch64::STRXui);

/*
* typedef struct va_list {
* void * stack; // next stack param
* void * gr_top; // end of GP arg reg save area
* void * vr_top; // end of FP/SIMD arg reg save area
* int gr_offs; // offset from gr_top to next GP register arg
* int vr_offs; // offset from vr_top to next FP/SIMD register arg
* } va_list;
*/
const auto VAList = I.getOperand(0).getReg();

// Our current offset in bytes from the va_list struct (VAList).
unsigned OffsetBytes = 0;

// Helper function to store (FrameIndex + Imm) to VAList at offset OffsetBytes
// and increment OffsetBytes by PtrSize.
const auto PushAddress = [&](const int FrameIndex, const int64_t Imm) {
const Register Top = MRI.createVirtualRegister(PtrRegClass);
auto MIB = BuildMI(*I.getParent(), I, I.getDebugLoc(), MCIDAddAddr)
.addDef(Top)
.addFrameIndex(FrameIndex)
.addImm(Imm)
.addImm(0);
constrainSelectedInstRegOperands(*MIB, TII, TRI, RBI);

const auto *MMO = *I.memoperands_begin();
MIB = BuildMI(*I.getParent(), I, I.getDebugLoc(), MCIDStoreAddr)
.addUse(Top)
.addUse(VAList)
.addImm(OffsetBytes / PtrSize)
.addMemOperand(MF.getMachineMemOperand(
MMO->getPointerInfo().getWithOffset(OffsetBytes),
MachineMemOperand::MOStore, PtrSize, MMO->getBaseAlign()));
constrainSelectedInstRegOperands(*MIB, TII, TRI, RBI);

OffsetBytes += PtrSize;
};

// void* stack at offset 0
PushAddress(FuncInfo->getVarArgsStackIndex(), 0);

// void* gr_top at offset 8 (4 on ILP32)
const unsigned GPRSize = FuncInfo->getVarArgsGPRSize();
PushAddress(FuncInfo->getVarArgsGPRIndex(), GPRSize);

// void* vr_top at offset 16 (8 on ILP32)
const unsigned FPRSize = FuncInfo->getVarArgsFPRSize();
PushAddress(FuncInfo->getVarArgsFPRIndex(), FPRSize);

// Helper function to store a 4-byte integer constant to VAList at offset
// OffsetBytes, and increment OffsetBytes by 4.
const auto PushIntConstant = [&](const int32_t Value) {
constexpr int IntSize = 4;
const Register Temp = MRI.createVirtualRegister(&AArch64::GPR32RegClass);
auto MIB =
BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(AArch64::MOVi32imm))
.addDef(Temp)
.addImm(Value);
constrainSelectedInstRegOperands(*MIB, TII, TRI, RBI);

const auto *MMO = *I.memoperands_begin();
MIB = BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(AArch64::STRWui))
.addUse(Temp)
.addUse(VAList)
.addImm(OffsetBytes / IntSize)
.addMemOperand(MF.getMachineMemOperand(
MMO->getPointerInfo().getWithOffset(OffsetBytes),
MachineMemOperand::MOStore, IntSize, MMO->getBaseAlign()));
constrainSelectedInstRegOperands(*MIB, TII, TRI, RBI);
OffsetBytes += IntSize;
};

// int gr_offs at offset 24 (12 on ILP32)
PushIntConstant(-static_cast<int32_t>(GPRSize));

// int vr_offs at offset 28 (16 on ILP32)
PushIntConstant(-static_cast<int32_t>(FPRSize));

assert(OffsetBytes == (STI.isTargetILP32() ? 20 : 32) && "Unexpected offset");

I.eraseFromParent();
return true;
}

bool AArch64InstructionSelector::selectVaStartDarwin(
Expand Down
56 changes: 56 additions & 0 deletions llvm/test/CodeGen/AArch64/GlobalISel/vararg.mir
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
# RUN: llc -O0 -mtriple=aarch64-unknown-linux -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck %s -check-prefixes=CHECK

--- |
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Don't need IR section, and there's too much going on here

target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128-Fn32"
target triple = "aarch64-unknown-linux"

%struct.__va_list = type { ptr, ptr, ptr, i32, i32 }

define i32 @va_start(ptr %a, ...) {
entry:
%ap = alloca %struct.__va_list, align 8
call void @llvm.lifetime.start.p0(i64 32, ptr %ap)
call void @llvm.va_start.p0(ptr %ap)
%vr_offs_p = getelementptr inbounds i8, ptr %ap, i64 28
%vr_offs = load i32, ptr %vr_offs_p, align 4
ret i32 %vr_offs
}
...
---
name: va_start
alignment: 16
legalized: true
regBankSelected: true
tracksRegLiveness: true
fixedStack:
- { id: 0, size: 4, alignment: 16 }
stack:
- { id: 0, size: 56, alignment: 8 }
- { id: 1, size: 128, alignment: 16 }
- { id: 2, name: ap, size: 32, alignment: 8 }
body: |
bb.0.entry:
; CHECK-LABEL: name: va_start
; CHECK: LIFETIME_START %stack.2.ap
; CHECK-NEXT: [[ADDXri:%[0-9]+]]:gpr64sp = ADDXri %stack.2.ap, 0, 0
; CHECK-NEXT: [[ADDXri1:%[0-9]+]]:gpr64common = ADDXri %stack.0, 0, 0
; CHECK-NEXT: STRXui [[ADDXri1]], [[ADDXri]], 0 :: (store (s64) into %ir.ap)
; CHECK-NEXT: [[ADDXri2:%[0-9]+]]:gpr64common = ADDXri %stack.0, 0, 0
; CHECK-NEXT: STRXui [[ADDXri2]], [[ADDXri]], 1 :: (store (s64) into %ir.ap + 8)
; CHECK-NEXT: [[ADDXri3:%[0-9]+]]:gpr64common = ADDXri %stack.0, 0, 0
; CHECK-NEXT: STRXui [[ADDXri3]], [[ADDXri]], 2 :: (store (s64) into %ir.ap + 16)
; CHECK-NEXT: [[MOVi32imm:%[0-9]+]]:gpr32 = MOVi32imm 0
; CHECK-NEXT: STRWui [[MOVi32imm]], [[ADDXri]], 6 :: (store (s32) into %ir.ap + 24, align 8)
; CHECK-NEXT: [[MOVi32imm1:%[0-9]+]]:gpr32 = MOVi32imm 0
; CHECK-NEXT: STRWui [[MOVi32imm1]], [[ADDXri]], 7 :: (store (s32) into %ir.ap + 28, basealign 8)
; CHECK-NEXT: [[LDRWui:%[0-9]+]]:gpr32 = LDRWui %stack.2.ap, 7 :: (dereferenceable load (s32) from %ir.vr_offs_p)
; CHECK-NEXT: $w0 = COPY [[LDRWui]]
LIFETIME_START %stack.2.ap
%0:gpr(p0) = G_FRAME_INDEX %stack.2.ap
G_VASTART %0(p0) :: (store (s256) into %ir.ap, align 8)
%1:gpr(s64) = G_CONSTANT i64 28
%2:gpr(p0) = G_PTR_ADD %0, %1(s64)
%3:gpr(s32) = G_LOAD %2(p0) :: (dereferenceable load (s32) from %ir.vr_offs_p)
$w0 = COPY %3(s32)
...
Loading