Skip to content

Commit 77af9d1

Browse files
Him188madhur13490
andauthored
[AArch64][GlobalISel] Implement selectVaStartAAPCS (#106979)
This commit adds the missing support for varargs in the instruction selection pass for AAPCS. Previously we only implemented this for Darwin. The implementation was according to AAPCS and SelectionDAG's LowerAAPCS_VASTART. It resolves all VA_START fallbacks in RAJAperf, llvm-test-suite, and SPEC CPU2017. These benchmarks now compile and pass without fallbacks due to varargs. --------- Co-authored-by: Madhur Amilkanthwar <[email protected]>
1 parent 90330e9 commit 77af9d1

File tree

3 files changed

+540
-1
lines changed

3 files changed

+540
-1
lines changed

llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp

Lines changed: 100 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1994,7 +1994,106 @@ bool AArch64InstructionSelector::selectVectorAshrLshr(
19941994

19951995
bool AArch64InstructionSelector::selectVaStartAAPCS(
19961996
MachineInstr &I, MachineFunction &MF, MachineRegisterInfo &MRI) const {
1997-
return false;
1997+
1998+
if (STI.isCallingConvWin64(MF.getFunction().getCallingConv(),
1999+
MF.getFunction().isVarArg()))
2000+
return false;
2001+
2002+
// The layout of the va_list struct is specified in the AArch64 Procedure Call
2003+
// Standard, section 10.1.5.
2004+
2005+
const AArch64FunctionInfo *FuncInfo = MF.getInfo<AArch64FunctionInfo>();
2006+
const unsigned PtrSize = STI.isTargetILP32() ? 4 : 8;
2007+
const auto *PtrRegClass =
2008+
STI.isTargetILP32() ? &AArch64::GPR32RegClass : &AArch64::GPR64RegClass;
2009+
2010+
const MCInstrDesc &MCIDAddAddr =
2011+
TII.get(STI.isTargetILP32() ? AArch64::ADDWri : AArch64::ADDXri);
2012+
const MCInstrDesc &MCIDStoreAddr =
2013+
TII.get(STI.isTargetILP32() ? AArch64::STRWui : AArch64::STRXui);
2014+
2015+
/*
2016+
* typedef struct va_list {
2017+
* void * stack; // next stack param
2018+
* void * gr_top; // end of GP arg reg save area
2019+
* void * vr_top; // end of FP/SIMD arg reg save area
2020+
* int gr_offs; // offset from gr_top to next GP register arg
2021+
* int vr_offs; // offset from vr_top to next FP/SIMD register arg
2022+
* } va_list;
2023+
*/
2024+
const auto VAList = I.getOperand(0).getReg();
2025+
2026+
// Our current offset in bytes from the va_list struct (VAList).
2027+
unsigned OffsetBytes = 0;
2028+
2029+
// Helper function to store (FrameIndex + Imm) to VAList at offset OffsetBytes
2030+
// and increment OffsetBytes by PtrSize.
2031+
const auto PushAddress = [&](const int FrameIndex, const int64_t Imm) {
2032+
const Register Top = MRI.createVirtualRegister(PtrRegClass);
2033+
auto MIB = BuildMI(*I.getParent(), I, I.getDebugLoc(), MCIDAddAddr)
2034+
.addDef(Top)
2035+
.addFrameIndex(FrameIndex)
2036+
.addImm(Imm)
2037+
.addImm(0);
2038+
constrainSelectedInstRegOperands(*MIB, TII, TRI, RBI);
2039+
2040+
const auto *MMO = *I.memoperands_begin();
2041+
MIB = BuildMI(*I.getParent(), I, I.getDebugLoc(), MCIDStoreAddr)
2042+
.addUse(Top)
2043+
.addUse(VAList)
2044+
.addImm(OffsetBytes / PtrSize)
2045+
.addMemOperand(MF.getMachineMemOperand(
2046+
MMO->getPointerInfo().getWithOffset(OffsetBytes),
2047+
MachineMemOperand::MOStore, PtrSize, MMO->getBaseAlign()));
2048+
constrainSelectedInstRegOperands(*MIB, TII, TRI, RBI);
2049+
2050+
OffsetBytes += PtrSize;
2051+
};
2052+
2053+
// void* stack at offset 0
2054+
PushAddress(FuncInfo->getVarArgsStackIndex(), 0);
2055+
2056+
// void* gr_top at offset 8 (4 on ILP32)
2057+
const unsigned GPRSize = FuncInfo->getVarArgsGPRSize();
2058+
PushAddress(FuncInfo->getVarArgsGPRIndex(), GPRSize);
2059+
2060+
// void* vr_top at offset 16 (8 on ILP32)
2061+
const unsigned FPRSize = FuncInfo->getVarArgsFPRSize();
2062+
PushAddress(FuncInfo->getVarArgsFPRIndex(), FPRSize);
2063+
2064+
// Helper function to store a 4-byte integer constant to VAList at offset
2065+
// OffsetBytes, and increment OffsetBytes by 4.
2066+
const auto PushIntConstant = [&](const int32_t Value) {
2067+
constexpr int IntSize = 4;
2068+
const Register Temp = MRI.createVirtualRegister(&AArch64::GPR32RegClass);
2069+
auto MIB =
2070+
BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(AArch64::MOVi32imm))
2071+
.addDef(Temp)
2072+
.addImm(Value);
2073+
constrainSelectedInstRegOperands(*MIB, TII, TRI, RBI);
2074+
2075+
const auto *MMO = *I.memoperands_begin();
2076+
MIB = BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(AArch64::STRWui))
2077+
.addUse(Temp)
2078+
.addUse(VAList)
2079+
.addImm(OffsetBytes / IntSize)
2080+
.addMemOperand(MF.getMachineMemOperand(
2081+
MMO->getPointerInfo().getWithOffset(OffsetBytes),
2082+
MachineMemOperand::MOStore, IntSize, MMO->getBaseAlign()));
2083+
constrainSelectedInstRegOperands(*MIB, TII, TRI, RBI);
2084+
OffsetBytes += IntSize;
2085+
};
2086+
2087+
// int gr_offs at offset 24 (12 on ILP32)
2088+
PushIntConstant(-static_cast<int32_t>(GPRSize));
2089+
2090+
// int vr_offs at offset 28 (16 on ILP32)
2091+
PushIntConstant(-static_cast<int32_t>(FPRSize));
2092+
2093+
assert(OffsetBytes == (STI.isTargetILP32() ? 20 : 32) && "Unexpected offset");
2094+
2095+
I.eraseFromParent();
2096+
return true;
19982097
}
19992098

20002099
bool AArch64InstructionSelector::selectVaStartDarwin(
Lines changed: 56 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,56 @@
1+
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
2+
# RUN: llc -O0 -mtriple=aarch64-unknown-linux -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck %s -check-prefixes=CHECK
3+
4+
--- |
5+
target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128-Fn32"
6+
target triple = "aarch64-unknown-linux"
7+
8+
%struct.__va_list = type { ptr, ptr, ptr, i32, i32 }
9+
10+
define i32 @va_start(ptr %a, ...) {
11+
entry:
12+
%ap = alloca %struct.__va_list, align 8
13+
call void @llvm.lifetime.start.p0(i64 32, ptr %ap)
14+
call void @llvm.va_start.p0(ptr %ap)
15+
%vr_offs_p = getelementptr inbounds i8, ptr %ap, i64 28
16+
%vr_offs = load i32, ptr %vr_offs_p, align 4
17+
ret i32 %vr_offs
18+
}
19+
...
20+
---
21+
name: va_start
22+
alignment: 16
23+
legalized: true
24+
regBankSelected: true
25+
tracksRegLiveness: true
26+
fixedStack:
27+
- { id: 0, size: 4, alignment: 16 }
28+
stack:
29+
- { id: 0, size: 56, alignment: 8 }
30+
- { id: 1, size: 128, alignment: 16 }
31+
- { id: 2, name: ap, size: 32, alignment: 8 }
32+
body: |
33+
bb.0.entry:
34+
; CHECK-LABEL: name: va_start
35+
; CHECK: LIFETIME_START %stack.2.ap
36+
; CHECK-NEXT: [[ADDXri:%[0-9]+]]:gpr64sp = ADDXri %stack.2.ap, 0, 0
37+
; CHECK-NEXT: [[ADDXri1:%[0-9]+]]:gpr64common = ADDXri %stack.0, 0, 0
38+
; CHECK-NEXT: STRXui [[ADDXri1]], [[ADDXri]], 0 :: (store (s64) into %ir.ap)
39+
; CHECK-NEXT: [[ADDXri2:%[0-9]+]]:gpr64common = ADDXri %stack.0, 0, 0
40+
; CHECK-NEXT: STRXui [[ADDXri2]], [[ADDXri]], 1 :: (store (s64) into %ir.ap + 8)
41+
; CHECK-NEXT: [[ADDXri3:%[0-9]+]]:gpr64common = ADDXri %stack.0, 0, 0
42+
; CHECK-NEXT: STRXui [[ADDXri3]], [[ADDXri]], 2 :: (store (s64) into %ir.ap + 16)
43+
; CHECK-NEXT: [[MOVi32imm:%[0-9]+]]:gpr32 = MOVi32imm 0
44+
; CHECK-NEXT: STRWui [[MOVi32imm]], [[ADDXri]], 6 :: (store (s32) into %ir.ap + 24, align 8)
45+
; CHECK-NEXT: [[MOVi32imm1:%[0-9]+]]:gpr32 = MOVi32imm 0
46+
; CHECK-NEXT: STRWui [[MOVi32imm1]], [[ADDXri]], 7 :: (store (s32) into %ir.ap + 28, basealign 8)
47+
; CHECK-NEXT: [[LDRWui:%[0-9]+]]:gpr32 = LDRWui %stack.2.ap, 7 :: (dereferenceable load (s32) from %ir.vr_offs_p)
48+
; CHECK-NEXT: $w0 = COPY [[LDRWui]]
49+
LIFETIME_START %stack.2.ap
50+
%0:gpr(p0) = G_FRAME_INDEX %stack.2.ap
51+
G_VASTART %0(p0) :: (store (s256) into %ir.ap, align 8)
52+
%1:gpr(s64) = G_CONSTANT i64 28
53+
%2:gpr(p0) = G_PTR_ADD %0, %1(s64)
54+
%3:gpr(s32) = G_LOAD %2(p0) :: (dereferenceable load (s32) from %ir.vr_offs_p)
55+
$w0 = COPY %3(s32)
56+
...

0 commit comments

Comments
 (0)