Skip to content

Commit 246baeb

Browse files
committed
[ARM] Add debug trace for tail-call optimisation
There are lots of reasons a call might not be eligible for tail-call optimisation, this adds debug trace to help understand the compiler's decisions here.
1 parent 8e289e4 commit 246baeb

File tree

1 file changed

+49
-15
lines changed

1 file changed

+49
-15
lines changed

llvm/lib/Target/ARM/ARMISelLowering.cpp

Lines changed: 49 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -3047,8 +3047,10 @@ bool ARMTargetLowering::IsEligibleForTailCallOptimization(
30473047
for (const CCValAssign &AL : ArgLocs)
30483048
if (AL.isRegLoc())
30493049
AddressRegisters.erase(AL.getLocReg());
3050-
if (AddressRegisters.empty())
3050+
if (AddressRegisters.empty()) {
3051+
LLVM_DEBUG(dbgs() << "false (no reg to hold function pointer)\n");
30513052
return false;
3053+
}
30523054
}
30533055

30543056
// Look for obvious safe cases to perform tail call optimization that do not
@@ -3057,18 +3059,26 @@ bool ARMTargetLowering::IsEligibleForTailCallOptimization(
30573059
// Exception-handling functions need a special set of instructions to indicate
30583060
// a return to the hardware. Tail-calling another function would probably
30593061
// break this.
3060-
if (CallerF.hasFnAttribute("interrupt"))
3062+
if (CallerF.hasFnAttribute("interrupt")) {
3063+
LLVM_DEBUG(dbgs() << "false (interrupt attribute)\n");
30613064
return false;
3065+
}
30623066

3063-
if (canGuaranteeTCO(CalleeCC, getTargetMachine().Options.GuaranteedTailCallOpt))
3067+
if (canGuaranteeTCO(CalleeCC,
3068+
getTargetMachine().Options.GuaranteedTailCallOpt)) {
3069+
LLVM_DEBUG(dbgs() << (CalleeCC == CallerCC ? "true" : "false")
3070+
<< " (guaranteed tail-call CC)\n");
30643071
return CalleeCC == CallerCC;
3072+
}
30653073

30663074
// Also avoid sibcall optimization if either caller or callee uses struct
30673075
// return semantics.
30683076
bool isCalleeStructRet = Outs.empty() ? false : Outs[0].Flags.isSRet();
30693077
bool isCallerStructRet = MF.getFunction().hasStructRetAttr();
3070-
if (isCalleeStructRet || isCallerStructRet)
3078+
if (isCalleeStructRet || isCallerStructRet) {
3079+
LLVM_DEBUG(dbgs() << "false (struct-ret)\n");
30713080
return false;
3081+
}
30723082

30733083
// Externally-defined functions with weak linkage should not be
30743084
// tail-called on ARM when the OS does not support dynamic
@@ -3081,8 +3091,11 @@ bool ARMTargetLowering::IsEligibleForTailCallOptimization(
30813091
const GlobalValue *GV = G->getGlobal();
30823092
const Triple &TT = getTargetMachine().getTargetTriple();
30833093
if (GV->hasExternalWeakLinkage() &&
3084-
(!TT.isOSWindows() || TT.isOSBinFormatELF() || TT.isOSBinFormatMachO()))
3094+
(!TT.isOSWindows() || TT.isOSBinFormatELF() ||
3095+
TT.isOSBinFormatMachO())) {
3096+
LLVM_DEBUG(dbgs() << "false (external weak linkage)\n");
30853097
return false;
3098+
}
30863099
}
30873100

30883101
// Check that the call results are passed in the same way.
@@ -3091,23 +3104,29 @@ bool ARMTargetLowering::IsEligibleForTailCallOptimization(
30913104
getEffectiveCallingConv(CalleeCC, isVarArg),
30923105
getEffectiveCallingConv(CallerCC, CallerF.isVarArg()), MF, C, Ins,
30933106
CCAssignFnForReturn(CalleeCC, isVarArg),
3094-
CCAssignFnForReturn(CallerCC, CallerF.isVarArg())))
3107+
CCAssignFnForReturn(CallerCC, CallerF.isVarArg()))) {
3108+
LLVM_DEBUG(dbgs() << "false (incompatible results)\n");
30953109
return false;
3110+
}
30963111
// The callee has to preserve all registers the caller needs to preserve.
30973112
const ARMBaseRegisterInfo *TRI = Subtarget->getRegisterInfo();
30983113
const uint32_t *CallerPreserved = TRI->getCallPreservedMask(MF, CallerCC);
30993114
if (CalleeCC != CallerCC) {
31003115
const uint32_t *CalleePreserved = TRI->getCallPreservedMask(MF, CalleeCC);
3101-
if (!TRI->regmaskSubsetEqual(CallerPreserved, CalleePreserved))
3116+
if (!TRI->regmaskSubsetEqual(CallerPreserved, CalleePreserved)) {
3117+
LLVM_DEBUG(dbgs() << "false (not all registers preserved)\n");
31023118
return false;
3119+
}
31033120
}
31043121

31053122
// If Caller's vararg or byval argument has been split between registers and
31063123
// stack, do not perform tail call, since part of the argument is in caller's
31073124
// local frame.
31083125
const ARMFunctionInfo *AFI_Caller = MF.getInfo<ARMFunctionInfo>();
3109-
if (AFI_Caller->getArgRegsSaveSize())
3126+
if (AFI_Caller->getArgRegsSaveSize()) {
3127+
LLVM_DEBUG(dbgs() << "false (arg reg save area)\n");
31103128
return false;
3129+
}
31113130

31123131
// If the callee takes no arguments then go on to check the results of the
31133132
// call.
@@ -3125,36 +3144,51 @@ bool ARMTargetLowering::IsEligibleForTailCallOptimization(
31253144
EVT RegVT = VA.getLocVT();
31263145
SDValue Arg = OutVals[realArgIdx];
31273146
ISD::ArgFlagsTy Flags = Outs[realArgIdx].Flags;
3128-
if (VA.getLocInfo() == CCValAssign::Indirect)
3147+
if (VA.getLocInfo() == CCValAssign::Indirect) {
3148+
LLVM_DEBUG(dbgs() << "false (indirect arg)\n");
31293149
return false;
3150+
}
31303151
if (VA.needsCustom() && (RegVT == MVT::f64 || RegVT == MVT::v2f64)) {
31313152
// f64 and vector types are split into multiple registers or
31323153
// register/stack-slot combinations. The types will not match
31333154
// the registers; give up on memory f64 refs until we figure
31343155
// out what to do about this.
3135-
if (!VA.isRegLoc())
3156+
if (!VA.isRegLoc()) {
3157+
LLVM_DEBUG(dbgs() << "false (f64 not in register)\n");
31363158
return false;
3137-
if (!ArgLocs[++i].isRegLoc())
3159+
}
3160+
if (!ArgLocs[++i].isRegLoc()) {
3161+
LLVM_DEBUG(dbgs() << "false (f64 not in register, second half)\n");
31383162
return false;
3163+
}
31393164
if (RegVT == MVT::v2f64) {
3140-
if (!ArgLocs[++i].isRegLoc())
3165+
if (!ArgLocs[++i].isRegLoc()) {
3166+
LLVM_DEBUG(dbgs() << "false (v2f64 not in register)\n");
31413167
return false;
3142-
if (!ArgLocs[++i].isRegLoc())
3168+
}
3169+
if (!ArgLocs[++i].isRegLoc()) {
3170+
LLVM_DEBUG(dbgs() << "false (v2f64 not in register, second half)\n");
31433171
return false;
3172+
}
31443173
}
31453174
} else if (!VA.isRegLoc()) {
31463175
if (!MatchingStackOffset(Arg, VA.getLocMemOffset(), Flags,
3147-
MFI, MRI, TII))
3176+
MFI, MRI, TII)) {
3177+
LLVM_DEBUG(dbgs() << "false (non-matching stack offset)\n");
31483178
return false;
3179+
}
31493180
}
31503181
}
31513182
}
31523183

31533184
const MachineRegisterInfo &MRI = MF.getRegInfo();
3154-
if (!parametersInCSRMatch(MRI, CallerPreserved, ArgLocs, OutVals))
3185+
if (!parametersInCSRMatch(MRI, CallerPreserved, ArgLocs, OutVals)) {
3186+
LLVM_DEBUG(dbgs() << "false (parameters in CSRs do not match)\n");
31553187
return false;
3188+
}
31563189
}
31573190

3191+
LLVM_DEBUG(dbgs() << "true\n");
31583192
return true;
31593193
}
31603194

0 commit comments

Comments
 (0)