Skip to content

Commit d88f96d

Browse files
committed
ARM: support mandatory tail calls for tailcc & swifttailcc
This adds support for callee-pop conventions to the ARM backend so that it can ensure a call marked "tail" is actually a tail call.
1 parent 66ff1cb commit d88f96d

17 files changed

+877
-75
lines changed

llvm/lib/Target/ARM/ARMBaseRegisterInfo.cpp

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -79,6 +79,11 @@ ARMBaseRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
7979
return CSR_NoRegs_SaveList;
8080
} else if (F.getCallingConv() == CallingConv::CFGuard_Check) {
8181
return CSR_Win_AAPCS_CFGuard_Check_SaveList;
82+
} else if (F.getCallingConv() == CallingConv::SwiftTail) {
83+
return STI.isTargetDarwin()
84+
? CSR_iOS_SwiftTail_SaveList
85+
: (UseSplitPush ? CSR_AAPCS_SplitPush_SwiftTail_SaveList
86+
: CSR_AAPCS_SwiftTail_SaveList);
8287
} else if (F.hasFnAttribute("interrupt")) {
8388
if (STI.isMClass()) {
8489
// M-class CPUs have hardware which saves the registers needed to allow a
@@ -129,6 +134,10 @@ ARMBaseRegisterInfo::getCallPreservedMask(const MachineFunction &MF,
129134
return CSR_NoRegs_RegMask;
130135
if (CC == CallingConv::CFGuard_Check)
131136
return CSR_Win_AAPCS_CFGuard_Check_RegMask;
137+
if (CC == CallingConv::SwiftTail) {
138+
return STI.isTargetDarwin() ? CSR_iOS_SwiftTail_RegMask
139+
: CSR_AAPCS_SwiftTail_RegMask;
140+
}
132141
if (STI.getTargetLowering()->supportSwiftError() &&
133142
MF.getFunction().getAttributes().hasAttrSomewhere(Attribute::SwiftError))
134143
return STI.isTargetDarwin() ? CSR_iOS_SwiftError_RegMask

llvm/lib/Target/ARM/ARMCallingConv.td

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -278,6 +278,9 @@ def CSR_Win_AAPCS_CFGuard_Check : CalleeSavedRegs<(add LR, R11, R10, R9, R8, R7,
278278
// R8 is used to pass swifterror, remove it from CSR.
279279
def CSR_AAPCS_SwiftError : CalleeSavedRegs<(sub CSR_AAPCS, R8)>;
280280

281+
// R10 is used to pass swiftself, remove it from CSR.
282+
def CSR_AAPCS_SwiftTail : CalleeSavedRegs<(sub CSR_AAPCS, R10)>;
283+
281284
// The order of callee-saved registers needs to match the order we actually push
282285
// them in FrameLowering, because this order is what's used by
283286
// PrologEpilogInserter to allocate frame index slots. So when R7 is the frame
@@ -290,6 +293,10 @@ def CSR_AAPCS_SplitPush : CalleeSavedRegs<(add LR, R7, R6, R5, R4,
290293
def CSR_AAPCS_SplitPush_SwiftError : CalleeSavedRegs<(sub CSR_AAPCS_SplitPush,
291294
R8)>;
292295

296+
// R10 is used to pass swifterror, remove it from CSR.
297+
def CSR_AAPCS_SplitPush_SwiftTail : CalleeSavedRegs<(sub CSR_AAPCS_SplitPush,
298+
R10)>;
299+
293300
// Constructors and destructors return 'this' in the ARM C++ ABI; since 'this'
294301
// and the pointer return value are both passed in R0 in these cases, this can
295302
// be partially modelled by treating R0 as a callee-saved register
@@ -305,6 +312,9 @@ def CSR_iOS : CalleeSavedRegs<(add LR, R7, R6, R5, R4, (sub CSR_AAPCS, R9))>;
305312
// R8 is used to pass swifterror, remove it from CSR.
306313
def CSR_iOS_SwiftError : CalleeSavedRegs<(sub CSR_iOS, R8)>;
307314

315+
// R10 is used to pass swiftself, remove it from CSR.
316+
def CSR_iOS_SwiftTail : CalleeSavedRegs<(sub CSR_iOS, R10)>;
317+
308318
def CSR_iOS_ThisReturn : CalleeSavedRegs<(add LR, R7, R6, R5, R4,
309319
(sub CSR_AAPCS_ThisReturn, R9))>;
310320

llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2037,7 +2037,7 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB,
20372037
}
20382038

20392039
auto NewMI = std::prev(MBBI);
2040-
for (unsigned i = 1, e = MBBI->getNumOperands(); i != e; ++i)
2040+
for (unsigned i = 2, e = MBBI->getNumOperands(); i != e; ++i)
20412041
NewMI->addOperand(MBBI->getOperand(i));
20422042

20432043

llvm/lib/Target/ARM/ARMFastISel.cpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1849,6 +1849,7 @@ CCAssignFn *ARMFastISel::CCAssignFnForCall(CallingConv::ID CC,
18491849
}
18501850
case CallingConv::ARM_AAPCS_VFP:
18511851
case CallingConv::Swift:
1852+
case CallingConv::SwiftTail:
18521853
if (!isVarArg)
18531854
return (Return ? RetCC_ARM_AAPCS_VFP: CC_ARM_AAPCS_VFP);
18541855
// Fall through to soft float variant, variadic functions don't
@@ -3014,6 +3015,7 @@ bool ARMFastISel::fastLowerArguments() {
30143015
case CallingConv::ARM_AAPCS:
30153016
case CallingConv::ARM_APCS:
30163017
case CallingConv::Swift:
3018+
case CallingConv::SwiftTail:
30173019
break;
30183020
}
30193021

llvm/lib/Target/ARM/ARMFrameLowering.cpp

Lines changed: 77 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -237,6 +237,41 @@ ARMFrameLowering::canSimplifyCallFramePseudos(const MachineFunction &MF) const {
237237
return hasReservedCallFrame(MF) || MF.getFrameInfo().hasVarSizedObjects();
238238
}
239239

240+
// Returns how much of the incoming argument stack area we should clean up in an
241+
// epilogue. For the C calling convention this will be 0, for guaranteed tail
242+
// call conventions it can be positive (a normal return or a tail call to a
243+
// function that uses less stack space for arguments) or negative (for a tail
244+
// call to a function that needs more stack space than us for arguments).
245+
static int getArgumentStackToRestore(MachineFunction &MF,
246+
MachineBasicBlock &MBB) {
247+
MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr();
248+
bool IsTailCallReturn = false;
249+
if (MBB.end() != MBBI) {
250+
unsigned RetOpcode = MBBI->getOpcode();
251+
IsTailCallReturn = RetOpcode == ARM::TCRETURNdi ||
252+
RetOpcode == ARM::TCRETURNri;
253+
}
254+
ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
255+
256+
int ArgumentPopSize = 0;
257+
if (IsTailCallReturn) {
258+
MachineOperand &StackAdjust = MBBI->getOperand(1);
259+
260+
// For a tail-call in a callee-pops-arguments environment, some or all of
261+
// the stack may actually be in use for the call's arguments, this is
262+
// calculated during LowerCall and consumed here...
263+
ArgumentPopSize = StackAdjust.getImm();
264+
} else {
265+
// ... otherwise the amount to pop is *all* of the argument space,
266+
// conveniently stored in the MachineFunctionInfo by
267+
// LowerFormalArguments. This will, of course, be zero for the C calling
268+
// convention.
269+
ArgumentPopSize = AFI->getArgumentStackToRestore();
270+
}
271+
272+
return ArgumentPopSize;
273+
}
274+
240275
static void emitRegPlusImmediate(
241276
bool isARM, MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI,
242277
const DebugLoc &dl, const ARMBaseInstrInfo &TII, unsigned DestReg,
@@ -868,7 +903,13 @@ void ARMFrameLowering::emitEpilogue(MachineFunction &MF,
868903
"This emitEpilogue does not support Thumb1!");
869904
bool isARM = !AFI->isThumbFunction();
870905

871-
unsigned ArgRegsSaveSize = AFI->getArgRegsSaveSize();
906+
// Amount of stack space we reserved next to incoming args for either
907+
// varargs registers or stack arguments in tail calls made by this function.
908+
unsigned ReservedArgStack = AFI->getArgRegsSaveSize();
909+
910+
// How much of the stack used by incoming arguments this function is expected
911+
// to restore in this particular epilogue.
912+
int IncomingArgStackToRestore = getArgumentStackToRestore(MF, MBB);
872913
int NumBytes = (int)MFI.getStackSize();
873914
Register FramePtr = RegInfo->getFrameRegister(MF);
874915

@@ -882,8 +923,8 @@ void ARMFrameLowering::emitEpilogue(MachineFunction &MF,
882923
DebugLoc dl = MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc();
883924

884925
if (!AFI->hasStackFrame()) {
885-
if (NumBytes - ArgRegsSaveSize != 0)
886-
emitSPUpdate(isARM, MBB, MBBI, dl, TII, NumBytes - ArgRegsSaveSize,
926+
if (NumBytes - ReservedArgStack != 0)
927+
emitSPUpdate(isARM, MBB, MBBI, dl, TII, NumBytes - ReservedArgStack,
887928
MachineInstr::FrameDestroy);
888929
} else {
889930
// Unwind MBBI to point to first LDR / VLDRD.
@@ -897,7 +938,7 @@ void ARMFrameLowering::emitEpilogue(MachineFunction &MF,
897938
}
898939

899940
// Move SP to start of FP callee save spill area.
900-
NumBytes -= (ArgRegsSaveSize +
941+
NumBytes -= (ReservedArgStack +
901942
AFI->getFPCXTSaveAreaSize() +
902943
AFI->getGPRCalleeSavedArea1Size() +
903944
AFI->getGPRCalleeSavedArea2Size() +
@@ -969,9 +1010,13 @@ void ARMFrameLowering::emitEpilogue(MachineFunction &MF,
9691010
if (AFI->getFPCXTSaveAreaSize()) MBBI++;
9701011
}
9711012

972-
if (ArgRegsSaveSize)
973-
emitSPUpdate(isARM, MBB, MBBI, dl, TII, ArgRegsSaveSize,
1013+
if (ReservedArgStack || IncomingArgStackToRestore) {
1014+
assert(ReservedArgStack + IncomingArgStackToRestore >= 0 &&
1015+
"attempting to restore negative stack amount");
1016+
emitSPUpdate(isARM, MBB, MBBI, dl, TII,
1017+
ReservedArgStack + IncomingArgStackToRestore,
9741018
MachineInstr::FrameDestroy);
1019+
}
9751020
}
9761021

9771022
/// getFrameIndexReference - Provide a base+offset reference to an FI slot for
@@ -2288,31 +2333,37 @@ MachineBasicBlock::iterator ARMFrameLowering::eliminateCallFramePseudoInstr(
22882333
MachineBasicBlock::iterator I) const {
22892334
const ARMBaseInstrInfo &TII =
22902335
*static_cast<const ARMBaseInstrInfo *>(MF.getSubtarget().getInstrInfo());
2336+
ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
2337+
bool isARM = !AFI->isThumbFunction();
2338+
DebugLoc dl = I->getDebugLoc();
2339+
unsigned Opc = I->getOpcode();
2340+
bool IsDestroy = Opc == TII.getCallFrameDestroyOpcode();
2341+
unsigned CalleePopAmount = IsDestroy ? I->getOperand(1).getImm() : 0;
2342+
2343+
assert(!AFI->isThumb1OnlyFunction() &&
2344+
"This eliminateCallFramePseudoInstr does not support Thumb1!");
2345+
2346+
int PIdx = I->findFirstPredOperandIdx();
2347+
ARMCC::CondCodes Pred = (PIdx == -1)
2348+
? ARMCC::AL
2349+
: (ARMCC::CondCodes)I->getOperand(PIdx).getImm();
2350+
unsigned PredReg = TII.getFramePred(*I);
2351+
22912352
if (!hasReservedCallFrame(MF)) {
2353+
// Bail early if the callee is expected to do the adjustment.
2354+
if (IsDestroy && CalleePopAmount != -1U)
2355+
return MBB.erase(I);
2356+
22922357
// If we have alloca, convert as follows:
22932358
// ADJCALLSTACKDOWN -> sub, sp, sp, amount
22942359
// ADJCALLSTACKUP -> add, sp, sp, amount
2295-
MachineInstr &Old = *I;
2296-
DebugLoc dl = Old.getDebugLoc();
2297-
unsigned Amount = TII.getFrameSize(Old);
2360+
unsigned Amount = TII.getFrameSize(*I);
22982361
if (Amount != 0) {
22992362
// We need to keep the stack aligned properly. To do this, we round the
23002363
// amount of space needed for the outgoing arguments up to the next
23012364
// alignment boundary.
23022365
Amount = alignSPAdjust(Amount);
23032366

2304-
ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
2305-
assert(!AFI->isThumb1OnlyFunction() &&
2306-
"This eliminateCallFramePseudoInstr does not support Thumb1!");
2307-
bool isARM = !AFI->isThumbFunction();
2308-
2309-
// Replace the pseudo instruction with a new instruction...
2310-
unsigned Opc = Old.getOpcode();
2311-
int PIdx = Old.findFirstPredOperandIdx();
2312-
ARMCC::CondCodes Pred =
2313-
(PIdx == -1) ? ARMCC::AL
2314-
: (ARMCC::CondCodes)Old.getOperand(PIdx).getImm();
2315-
unsigned PredReg = TII.getFramePred(Old);
23162367
if (Opc == ARM::ADJCALLSTACKDOWN || Opc == ARM::tADJCALLSTACKDOWN) {
23172368
emitSPUpdate(isARM, MBB, I, dl, TII, -Amount, MachineInstr::NoFlags,
23182369
Pred, PredReg);
@@ -2322,6 +2373,11 @@ MachineBasicBlock::iterator ARMFrameLowering::eliminateCallFramePseudoInstr(
23222373
Pred, PredReg);
23232374
}
23242375
}
2376+
} else if (CalleePopAmount != -1U) {
2377+
// If the calling convention demands that the callee pops arguments from the
2378+
// stack, we want to add it back if we have a reserved call frame.
2379+
emitSPUpdate(isARM, MBB, I, dl, TII, -CalleePopAmount,
2380+
MachineInstr::NoFlags, Pred, PredReg);
23252381
}
23262382
return MBB.erase(I);
23272383
}

0 commit comments

Comments
 (0)