Skip to content

Commit a2088a2

Browse files
committed
[ARM] musttail fixes
Backend: - Caller and callee arguments no longer have to match, just to take up the same space, as they can be changed before the call - Allowed tail calls if callee and callee both (or neither) use sret, wheras before it would be dissalowed if either used sret - Allowed tail calls if byval args are used - Added debug trace for IsEligibleForTailCallOptimisation Frontend (clang): - Do not generate extra alloca if sret is used with musttail, as the space for the sret is allocated already Change-Id: Ic7f246a7eca43c06874922d642d7dc44bdfc98ec
1 parent ca3b9af commit a2088a2

File tree

10 files changed

+661
-191
lines changed

10 files changed

+661
-191
lines changed

clang/lib/CodeGen/CGCall.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5086,7 +5086,7 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo,
50865086
RawAddress SRetAlloca = RawAddress::invalid();
50875087
llvm::Value *UnusedReturnSizePtr = nullptr;
50885088
if (RetAI.isIndirect() || RetAI.isInAlloca() || RetAI.isCoerceAndExpand()) {
5089-
if (IsVirtualFunctionPointerThunk && RetAI.isIndirect()) {
5089+
if ((IsVirtualFunctionPointerThunk || IsMustTail) && RetAI.isIndirect()) {
50905090
SRetPtr = makeNaturalAddressForPointer(CurFn->arg_begin() +
50915091
IRFunctionArgs.getSRetArgNo(),
50925092
RetTy, CharUnits::fromQuantity(1));

llvm/include/llvm/CodeGen/CallingConvLower.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -540,6 +540,8 @@ class CCState {
540540
});
541541
}
542542

543+
void dump() const;
544+
543545
private:
544546
/// MarkAllocated - Mark a register and all of its aliases as allocated.
545547
void MarkAllocated(MCPhysReg Reg);

llvm/lib/CodeGen/CallingConvLower.cpp

Lines changed: 61 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -290,3 +290,64 @@ bool CCState::resultsCompatible(CallingConv::ID CalleeCC,
290290
return std::equal(RVLocs1.begin(), RVLocs1.end(), RVLocs2.begin(),
291291
RVLocs2.end(), AreCompatible);
292292
}
293+
294+
void CCState::dump() const {
295+
dbgs() << "CCState:\n";
296+
for (const CCValAssign &Loc : Locs) {
297+
if (Loc.isRegLoc()) {
298+
dbgs() << " Reg " << TRI.getName(Loc.getLocReg());
299+
} else if (Loc.isMemLoc()) {
300+
dbgs() << " Mem " << Loc.getLocMemOffset();
301+
} else {
302+
assert(Loc.isPendingLoc());
303+
dbgs() << " Pend " << Loc.getExtraInfo();
304+
}
305+
306+
dbgs() << " ValVT:" << Loc.getValVT();
307+
dbgs() << " LocVT:" << Loc.getLocVT();
308+
309+
if (Loc.needsCustom())
310+
dbgs() << " custom";
311+
312+
switch (Loc.getLocInfo()) {
313+
case CCValAssign::Full:
314+
dbgs() << " Full";
315+
break;
316+
case CCValAssign::SExt:
317+
dbgs() << " SExt";
318+
break;
319+
case CCValAssign::ZExt:
320+
dbgs() << " ZExt";
321+
break;
322+
case CCValAssign::AExt:
323+
dbgs() << " AExt";
324+
break;
325+
case CCValAssign::SExtUpper:
326+
dbgs() << " SExtUpper";
327+
break;
328+
case CCValAssign::ZExtUpper:
329+
dbgs() << " ZExtUpper";
330+
break;
331+
case CCValAssign::AExtUpper:
332+
dbgs() << " AExtUpper";
333+
break;
334+
case CCValAssign::BCvt:
335+
dbgs() << " BCvt";
336+
break;
337+
case CCValAssign::Trunc:
338+
dbgs() << " Trunc";
339+
break;
340+
case CCValAssign::VExt:
341+
dbgs() << " VExt";
342+
break;
343+
case CCValAssign::FPExt:
344+
dbgs() << " FPExt";
345+
break;
346+
case CCValAssign::Indirect:
347+
dbgs() << " Indirect";
348+
break;
349+
}
350+
351+
dbgs() << "\n";
352+
}
353+
}

llvm/lib/Target/ARM/ARMISelLowering.cpp

Lines changed: 42 additions & 99 deletions
Original file line numberDiff line numberDiff line change
@@ -2407,8 +2407,8 @@ ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
24072407
isTailCall = false;
24082408

24092409
// For both the non-secure calls and the returns from a CMSE entry function,
2410-
// the function needs to do some extra work afte r the call, or before the
2411-
// return, respectively, thus it cannot end with atail call
2410+
// the function needs to do some extra work after the call, or before the
2411+
// return, respectively, thus it cannot end with a tail call
24122412
if (isCmseNSCall || AFI->isCmseNSEntryFunction())
24132413
isTailCall = false;
24142414

@@ -2960,50 +2960,6 @@ void ARMTargetLowering::HandleByVal(CCState *State, unsigned &Size,
29602960
Size = std::max<int>(Size - Excess, 0);
29612961
}
29622962

2963-
/// MatchingStackOffset - Return true if the given stack call argument is
2964-
/// already available in the same position (relatively) of the caller's
2965-
/// incoming argument stack.
2966-
static
2967-
bool MatchingStackOffset(SDValue Arg, unsigned Offset, ISD::ArgFlagsTy Flags,
2968-
MachineFrameInfo &MFI, const MachineRegisterInfo *MRI,
2969-
const TargetInstrInfo *TII) {
2970-
unsigned Bytes = Arg.getValueSizeInBits() / 8;
2971-
int FI = std::numeric_limits<int>::max();
2972-
if (Arg.getOpcode() == ISD::CopyFromReg) {
2973-
Register VR = cast<RegisterSDNode>(Arg.getOperand(1))->getReg();
2974-
if (!VR.isVirtual())
2975-
return false;
2976-
MachineInstr *Def = MRI->getVRegDef(VR);
2977-
if (!Def)
2978-
return false;
2979-
if (!Flags.isByVal()) {
2980-
if (!TII->isLoadFromStackSlot(*Def, FI))
2981-
return false;
2982-
} else {
2983-
return false;
2984-
}
2985-
} else if (LoadSDNode *Ld = dyn_cast<LoadSDNode>(Arg)) {
2986-
if (Flags.isByVal())
2987-
// ByVal argument is passed in as a pointer but it's now being
2988-
// dereferenced. e.g.
2989-
// define @foo(%struct.X* %A) {
2990-
// tail call @bar(%struct.X* byval %A)
2991-
// }
2992-
return false;
2993-
SDValue Ptr = Ld->getBasePtr();
2994-
FrameIndexSDNode *FINode = dyn_cast<FrameIndexSDNode>(Ptr);
2995-
if (!FINode)
2996-
return false;
2997-
FI = FINode->getIndex();
2998-
} else
2999-
return false;
3000-
3001-
assert(FI != std::numeric_limits<int>::max());
3002-
if (!MFI.isFixedObjectIndex(FI))
3003-
return false;
3004-
return Offset == MFI.getObjectOffset(FI) && Bytes == MFI.getObjectSize(FI);
3005-
}
3006-
30072963
/// IsEligibleForTailCallOptimization - Check whether the call is eligible
30082964
/// for tail call optimization. Targets which want to do tail call
30092965
/// optimization should implement this function. Note that this function also
@@ -3045,8 +3001,10 @@ bool ARMTargetLowering::IsEligibleForTailCallOptimization(
30453001
for (const CCValAssign &AL : ArgLocs)
30463002
if (AL.isRegLoc())
30473003
AddressRegisters.erase(AL.getLocReg());
3048-
if (AddressRegisters.empty())
3004+
if (AddressRegisters.empty()) {
3005+
LLVM_DEBUG(dbgs() << "false (no space for target address)\n");
30493006
return false;
3007+
}
30503008
}
30513009

30523010
// Look for obvious safe cases to perform tail call optimization that do not
@@ -3055,18 +3013,26 @@ bool ARMTargetLowering::IsEligibleForTailCallOptimization(
30553013
// Exception-handling functions need a special set of instructions to indicate
30563014
// a return to the hardware. Tail-calling another function would probably
30573015
// break this.
3058-
if (CallerF.hasFnAttribute("interrupt"))
3016+
if (CallerF.hasFnAttribute("interrupt")) {
3017+
LLVM_DEBUG(dbgs() << "false (interrupt attribute)\n");
30593018
return false;
3019+
}
30603020

3061-
if (canGuaranteeTCO(CalleeCC, getTargetMachine().Options.GuaranteedTailCallOpt))
3021+
if (canGuaranteeTCO(CalleeCC,
3022+
getTargetMachine().Options.GuaranteedTailCallOpt)) {
3023+
LLVM_DEBUG(dbgs() << (CalleeCC == CallerCC ? "true" : "false")
3024+
<< " (guaranteed tail-call CC)\n");
30623025
return CalleeCC == CallerCC;
3026+
}
30633027

3064-
// Also avoid sibcall optimization if either caller or callee uses struct
3065-
// return semantics.
3028+
// Also avoid sibcall optimization if only one of caller or callee uses
3029+
// struct return semantics.
30663030
bool isCalleeStructRet = Outs.empty() ? false : Outs[0].Flags.isSRet();
30673031
bool isCallerStructRet = MF.getFunction().hasStructRetAttr();
3068-
if (isCalleeStructRet || isCallerStructRet)
3032+
if (isCalleeStructRet != isCallerStructRet) {
3033+
LLVM_DEBUG(dbgs() << "false (struct-ret)\n");
30693034
return false;
3035+
}
30703036

30713037
// Externally-defined functions with weak linkage should not be
30723038
// tail-called on ARM when the OS does not support dynamic
@@ -3079,8 +3045,11 @@ bool ARMTargetLowering::IsEligibleForTailCallOptimization(
30793045
const GlobalValue *GV = G->getGlobal();
30803046
const Triple &TT = getTargetMachine().getTargetTriple();
30813047
if (GV->hasExternalWeakLinkage() &&
3082-
(!TT.isOSWindows() || TT.isOSBinFormatELF() || TT.isOSBinFormatMachO()))
3048+
(!TT.isOSWindows() || TT.isOSBinFormatELF() ||
3049+
TT.isOSBinFormatMachO())) {
3050+
LLVM_DEBUG(dbgs() << "false (external weak linkage)\n");
30833051
return false;
3052+
}
30843053
}
30853054

30863055
// Check that the call results are passed in the same way.
@@ -3089,70 +3058,44 @@ bool ARMTargetLowering::IsEligibleForTailCallOptimization(
30893058
getEffectiveCallingConv(CalleeCC, isVarArg),
30903059
getEffectiveCallingConv(CallerCC, CallerF.isVarArg()), MF, C, Ins,
30913060
CCAssignFnForReturn(CalleeCC, isVarArg),
3092-
CCAssignFnForReturn(CallerCC, CallerF.isVarArg())))
3061+
CCAssignFnForReturn(CallerCC, CallerF.isVarArg()))) {
3062+
LLVM_DEBUG(dbgs() << "false (incompatible results)\n");
30933063
return false;
3064+
}
30943065
// The callee has to preserve all registers the caller needs to preserve.
30953066
const ARMBaseRegisterInfo *TRI = Subtarget->getRegisterInfo();
30963067
const uint32_t *CallerPreserved = TRI->getCallPreservedMask(MF, CallerCC);
30973068
if (CalleeCC != CallerCC) {
30983069
const uint32_t *CalleePreserved = TRI->getCallPreservedMask(MF, CalleeCC);
3099-
if (!TRI->regmaskSubsetEqual(CallerPreserved, CalleePreserved))
3070+
if (!TRI->regmaskSubsetEqual(CallerPreserved, CalleePreserved)) {
3071+
LLVM_DEBUG(dbgs() << "false (not all registers preserved)\n");
31003072
return false;
3073+
}
31013074
}
31023075

3103-
// If Caller's vararg or byval argument has been split between registers and
3076+
// If Caller's vararg argument has been split between registers and
31043077
// stack, do not perform tail call, since part of the argument is in caller's
31053078
// local frame.
31063079
const ARMFunctionInfo *AFI_Caller = MF.getInfo<ARMFunctionInfo>();
3107-
if (AFI_Caller->getArgRegsSaveSize())
3080+
if (CLI.IsVarArg && AFI_Caller->getArgRegsSaveSize()) {
3081+
LLVM_DEBUG(dbgs() << "false (vararg arg reg save area)\n");
31083082
return false;
3083+
}
31093084

31103085
// If the callee takes no arguments then go on to check the results of the
31113086
// call.
3112-
if (!Outs.empty()) {
3113-
if (CCInfo.getStackSize()) {
3114-
// Check if the arguments are already laid out in the right way as
3115-
// the caller's fixed stack objects.
3116-
MachineFrameInfo &MFI = MF.getFrameInfo();
3117-
const MachineRegisterInfo *MRI = &MF.getRegInfo();
3118-
const TargetInstrInfo *TII = Subtarget->getInstrInfo();
3119-
for (unsigned i = 0, realArgIdx = 0, e = ArgLocs.size();
3120-
i != e;
3121-
++i, ++realArgIdx) {
3122-
CCValAssign &VA = ArgLocs[i];
3123-
EVT RegVT = VA.getLocVT();
3124-
SDValue Arg = OutVals[realArgIdx];
3125-
ISD::ArgFlagsTy Flags = Outs[realArgIdx].Flags;
3126-
if (VA.getLocInfo() == CCValAssign::Indirect)
3127-
return false;
3128-
if (VA.needsCustom() && (RegVT == MVT::f64 || RegVT == MVT::v2f64)) {
3129-
// f64 and vector types are split into multiple registers or
3130-
// register/stack-slot combinations. The types will not match
3131-
// the registers; give up on memory f64 refs until we figure
3132-
// out what to do about this.
3133-
if (!VA.isRegLoc())
3134-
return false;
3135-
if (!ArgLocs[++i].isRegLoc())
3136-
return false;
3137-
if (RegVT == MVT::v2f64) {
3138-
if (!ArgLocs[++i].isRegLoc())
3139-
return false;
3140-
if (!ArgLocs[++i].isRegLoc())
3141-
return false;
3142-
}
3143-
} else if (!VA.isRegLoc()) {
3144-
if (!MatchingStackOffset(Arg, VA.getLocMemOffset(), Flags,
3145-
MFI, MRI, TII))
3146-
return false;
3147-
}
3148-
}
3149-
}
3150-
3151-
const MachineRegisterInfo &MRI = MF.getRegInfo();
3152-
if (!parametersInCSRMatch(MRI, CallerPreserved, ArgLocs, OutVals))
3153-
return false;
3087+
const MachineRegisterInfo &MRI = MF.getRegInfo();
3088+
if (!parametersInCSRMatch(MRI, CallerPreserved, ArgLocs, OutVals)) {
3089+
LLVM_DEBUG(dbgs() << "false (parameters in CSRs do not match)\n");
3090+
return false;
31543091
}
31553092

3093+
// If the stack arguments for this call do not fit into our own save area then
3094+
// the call cannot be made tail.
3095+
if (CCInfo.getStackSize() > AFI_Caller->getArgumentStackSize())
3096+
return false;
3097+
3098+
LLVM_DEBUG(dbgs() << "true\n");
31563099
return true;
31573100
}
31583101

llvm/test/CodeGen/ARM/2013-05-13-AAPCS-byval-padding.ll

Lines changed: 5 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -12,17 +12,11 @@ define void @check227(
1212
; arg1 --> SP+188
1313

1414
entry:
15-
16-
;CHECK: sub sp, sp, #12
17-
;CHECK: push {r11, lr}
18-
;CHECK: sub sp, sp, #4
19-
;CHECK: add r0, sp, #12
20-
;CHECK: stm r0, {r1, r2, r3}
21-
;CHECK: ldr r0, [sp, #212]
22-
;CHECK: bl useInt
23-
;CHECK: add sp, sp, #4
24-
;CHECK: pop {r11, lr}
25-
;CHECK: add sp, sp, #12
15+
; CHECK: sub sp, sp, #12
16+
; CHECK: stm sp, {r1, r2, r3}
17+
; CHECK: ldr r0, [sp, #200]
18+
; CHECK: add sp, sp, #12
19+
; CHECK: b useInt
2620

2721
%0 = ptrtoint ptr %arg1 to i32
2822
tail call void @useInt(i32 %0)

llvm/test/CodeGen/ARM/2013-05-13-AAPCS-byval-padding2.ll

Lines changed: 5 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -7,14 +7,11 @@
77
define void @foo(ptr byval(%struct4bytes) %p0, ; --> R0
88
ptr byval(%struct20bytes) %p1 ; --> R1,R2,R3, [SP+0 .. SP+8)
99
) {
10-
;CHECK: sub sp, sp, #16
11-
;CHECK: push {r11, lr}
12-
;CHECK: add r12, sp, #8
13-
;CHECK: stm r12, {r0, r1, r2, r3}
14-
;CHECK: add r0, sp, #12
15-
;CHECK: bl useInt
16-
;CHECK: pop {r11, lr}
17-
;CHECK: add sp, sp, #16
10+
;CHECK: sub sp, sp, #16
11+
;CHECK: stm sp, {r0, r1, r2, r3}
12+
;CHECK: add r0, sp, #4
13+
;CHECK: add sp, sp, #16
14+
;CHECK: b useInt
1815

1916
%1 = ptrtoint ptr %p1 to i32
2017
tail call void @useInt(i32 %1)

llvm/test/CodeGen/ARM/fp-arg-shuffle.ll

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,28 @@
33
; CHECK: function1
44
; CHECK-NOT: vmov
55
define double @function1(double %a, double %b, double %c, double %d, double %e, double %f) nounwind noinline ssp {
6+
; CHECK-LABEL: function1:
7+
; CHECK: @ %bb.0: @ %entry
8+
; CHECK-NEXT: .save {r4, r5, r11, lr}
9+
; CHECK-NEXT: push {r4, r5, r11, lr}
10+
; CHECK-NEXT: vldr d16, [sp, #40]
11+
; CHECK-NEXT: vldr d17, [sp, #32]
12+
; CHECK-NEXT: vmov r12, lr, d16
13+
; CHECK-NEXT: vldr d16, [sp, #16]
14+
; CHECK-NEXT: vmov r4, r5, d17
15+
; CHECK-NEXT: vldr d17, [sp, #24]
16+
; CHECK-NEXT: str r3, [sp, #36]
17+
; CHECK-NEXT: str r2, [sp, #32]
18+
; CHECK-NEXT: str r1, [sp, #44]
19+
; CHECK-NEXT: str r0, [sp, #40]
20+
; CHECK-NEXT: vstr d17, [sp, #16]
21+
; CHECK-NEXT: vstr d16, [sp, #24]
22+
; CHECK-NEXT: mov r0, r12
23+
; CHECK-NEXT: mov r1, lr
24+
; CHECK-NEXT: mov r2, r4
25+
; CHECK-NEXT: mov r3, r5
26+
; CHECK-NEXT: pop {r4, r5, r11, lr}
27+
; CHECK-NEXT: b function2
628
entry:
729
%call = tail call double @function2(double %f, double %e, double %d, double %c, double %b, double %a) nounwind
830
ret double %call

0 commit comments

Comments
 (0)