@@ -2407,8 +2407,8 @@ ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
2407
2407
isTailCall = false;
2408
2408
2409
2409
// For both the non-secure calls and the returns from a CMSE entry function,
2410
- // the function needs to do some extra work after the call, or before the
2411
- // return, respectively, thus it cannot end with a tail call
2410
+ // the function needs to do some extra work afte r the call, or before the
2411
+ // return, respectively, thus it cannot end with atail call
2412
2412
if (isCmseNSCall || AFI->isCmseNSEntryFunction())
2413
2413
isTailCall = false;
2414
2414
@@ -2960,6 +2960,50 @@ void ARMTargetLowering::HandleByVal(CCState *State, unsigned &Size,
2960
2960
Size = std::max<int>(Size - Excess, 0);
2961
2961
}
2962
2962
2963
+ /// MatchingStackOffset - Return true if the given stack call argument is
2964
+ /// already available in the same position (relatively) of the caller's
2965
+ /// incoming argument stack.
2966
+ static
2967
+ bool MatchingStackOffset(SDValue Arg, unsigned Offset, ISD::ArgFlagsTy Flags,
2968
+ MachineFrameInfo &MFI, const MachineRegisterInfo *MRI,
2969
+ const TargetInstrInfo *TII) {
2970
+ unsigned Bytes = Arg.getValueSizeInBits() / 8;
2971
+ int FI = std::numeric_limits<int>::max();
2972
+ if (Arg.getOpcode() == ISD::CopyFromReg) {
2973
+ Register VR = cast<RegisterSDNode>(Arg.getOperand(1))->getReg();
2974
+ if (!VR.isVirtual())
2975
+ return false;
2976
+ MachineInstr *Def = MRI->getVRegDef(VR);
2977
+ if (!Def)
2978
+ return false;
2979
+ if (!Flags.isByVal()) {
2980
+ if (!TII->isLoadFromStackSlot(*Def, FI))
2981
+ return false;
2982
+ } else {
2983
+ return false;
2984
+ }
2985
+ } else if (LoadSDNode *Ld = dyn_cast<LoadSDNode>(Arg)) {
2986
+ if (Flags.isByVal())
2987
+ // ByVal argument is passed in as a pointer but it's now being
2988
+ // dereferenced. e.g.
2989
+ // define @foo(%struct.X* %A) {
2990
+ // tail call @bar(%struct.X* byval %A)
2991
+ // }
2992
+ return false;
2993
+ SDValue Ptr = Ld->getBasePtr();
2994
+ FrameIndexSDNode *FINode = dyn_cast<FrameIndexSDNode>(Ptr);
2995
+ if (!FINode)
2996
+ return false;
2997
+ FI = FINode->getIndex();
2998
+ } else
2999
+ return false;
3000
+
3001
+ assert(FI != std::numeric_limits<int>::max());
3002
+ if (!MFI.isFixedObjectIndex(FI))
3003
+ return false;
3004
+ return Offset == MFI.getObjectOffset(FI) && Bytes == MFI.getObjectSize(FI);
3005
+ }
3006
+
2963
3007
/// IsEligibleForTailCallOptimization - Check whether the call is eligible
2964
3008
/// for tail call optimization. Targets which want to do tail call
2965
3009
/// optimization should implement this function. Note that this function also
@@ -3001,10 +3045,8 @@ bool ARMTargetLowering::IsEligibleForTailCallOptimization(
3001
3045
for (const CCValAssign &AL : ArgLocs)
3002
3046
if (AL.isRegLoc())
3003
3047
AddressRegisters.erase(AL.getLocReg());
3004
- if (AddressRegisters.empty()) {
3005
- LLVM_DEBUG(dbgs() << "false (no space for target address)\n");
3048
+ if (AddressRegisters.empty())
3006
3049
return false;
3007
- }
3008
3050
}
3009
3051
3010
3052
// Look for obvious safe cases to perform tail call optimization that do not
@@ -3013,26 +3055,18 @@ bool ARMTargetLowering::IsEligibleForTailCallOptimization(
3013
3055
// Exception-handling functions need a special set of instructions to indicate
3014
3056
// a return to the hardware. Tail-calling another function would probably
3015
3057
// break this.
3016
- if (CallerF.hasFnAttribute("interrupt")) {
3017
- LLVM_DEBUG(dbgs() << "false (interrupt attribute)\n");
3058
+ if (CallerF.hasFnAttribute("interrupt"))
3018
3059
return false;
3019
- }
3020
3060
3021
- if (canGuaranteeTCO(CalleeCC,
3022
- getTargetMachine().Options.GuaranteedTailCallOpt)) {
3023
- LLVM_DEBUG(dbgs() << (CalleeCC == CallerCC ? "true" : "false")
3024
- << " (guaranteed tail-call CC)\n");
3061
+ if (canGuaranteeTCO(CalleeCC, getTargetMachine().Options.GuaranteedTailCallOpt))
3025
3062
return CalleeCC == CallerCC;
3026
- }
3027
3063
3028
- // Also avoid sibcall optimization if only one of caller or callee uses
3029
- // struct return semantics.
3064
+ // Also avoid sibcall optimization if either caller or callee uses struct
3065
+ // return semantics.
3030
3066
bool isCalleeStructRet = Outs.empty() ? false : Outs[0].Flags.isSRet();
3031
3067
bool isCallerStructRet = MF.getFunction().hasStructRetAttr();
3032
- if (isCalleeStructRet != isCallerStructRet) {
3033
- LLVM_DEBUG(dbgs() << "false (struct-ret)\n");
3068
+ if (isCalleeStructRet || isCallerStructRet)
3034
3069
return false;
3035
- }
3036
3070
3037
3071
// Externally-defined functions with weak linkage should not be
3038
3072
// tail-called on ARM when the OS does not support dynamic
@@ -3045,11 +3079,8 @@ bool ARMTargetLowering::IsEligibleForTailCallOptimization(
3045
3079
const GlobalValue *GV = G->getGlobal();
3046
3080
const Triple &TT = getTargetMachine().getTargetTriple();
3047
3081
if (GV->hasExternalWeakLinkage() &&
3048
- (!TT.isOSWindows() || TT.isOSBinFormatELF() ||
3049
- TT.isOSBinFormatMachO())) {
3050
- LLVM_DEBUG(dbgs() << "false (external weak linkage)\n");
3082
+ (!TT.isOSWindows() || TT.isOSBinFormatELF() || TT.isOSBinFormatMachO()))
3051
3083
return false;
3052
- }
3053
3084
}
3054
3085
3055
3086
// Check that the call results are passed in the same way.
@@ -3058,44 +3089,70 @@ bool ARMTargetLowering::IsEligibleForTailCallOptimization(
3058
3089
getEffectiveCallingConv(CalleeCC, isVarArg),
3059
3090
getEffectiveCallingConv(CallerCC, CallerF.isVarArg()), MF, C, Ins,
3060
3091
CCAssignFnForReturn(CalleeCC, isVarArg),
3061
- CCAssignFnForReturn(CallerCC, CallerF.isVarArg()))) {
3062
- LLVM_DEBUG(dbgs() << "false (incompatible results)\n");
3092
+ CCAssignFnForReturn(CallerCC, CallerF.isVarArg())))
3063
3093
return false;
3064
- }
3065
3094
// The callee has to preserve all registers the caller needs to preserve.
3066
3095
const ARMBaseRegisterInfo *TRI = Subtarget->getRegisterInfo();
3067
3096
const uint32_t *CallerPreserved = TRI->getCallPreservedMask(MF, CallerCC);
3068
3097
if (CalleeCC != CallerCC) {
3069
3098
const uint32_t *CalleePreserved = TRI->getCallPreservedMask(MF, CalleeCC);
3070
- if (!TRI->regmaskSubsetEqual(CallerPreserved, CalleePreserved)) {
3071
- LLVM_DEBUG(dbgs() << "false (not all registers preserved)\n");
3099
+ if (!TRI->regmaskSubsetEqual(CallerPreserved, CalleePreserved))
3072
3100
return false;
3073
- }
3074
3101
}
3075
3102
3076
- // If Caller's vararg argument has been split between registers and
3103
+ // If Caller's vararg or byval argument has been split between registers and
3077
3104
// stack, do not perform tail call, since part of the argument is in caller's
3078
3105
// local frame.
3079
3106
const ARMFunctionInfo *AFI_Caller = MF.getInfo<ARMFunctionInfo>();
3080
- if (CLI.IsVarArg && AFI_Caller->getArgRegsSaveSize()) {
3081
- LLVM_DEBUG(dbgs() << "false (vararg arg reg save area)\n");
3107
+ if (AFI_Caller->getArgRegsSaveSize())
3082
3108
return false;
3083
- }
3084
3109
3085
3110
// If the callee takes no arguments then go on to check the results of the
3086
3111
// call.
3087
- const MachineRegisterInfo &MRI = MF.getRegInfo();
3088
- if (!parametersInCSRMatch(MRI, CallerPreserved, ArgLocs, OutVals)) {
3089
- LLVM_DEBUG(dbgs() << "false (parameters in CSRs do not match)\n");
3090
- return false;
3091
- }
3112
+ if (!Outs.empty()) {
3113
+ if (CCInfo.getStackSize()) {
3114
+ // Check if the arguments are already laid out in the right way as
3115
+ // the caller's fixed stack objects.
3116
+ MachineFrameInfo &MFI = MF.getFrameInfo();
3117
+ const MachineRegisterInfo *MRI = &MF.getRegInfo();
3118
+ const TargetInstrInfo *TII = Subtarget->getInstrInfo();
3119
+ for (unsigned i = 0, realArgIdx = 0, e = ArgLocs.size();
3120
+ i != e;
3121
+ ++i, ++realArgIdx) {
3122
+ CCValAssign &VA = ArgLocs[i];
3123
+ EVT RegVT = VA.getLocVT();
3124
+ SDValue Arg = OutVals[realArgIdx];
3125
+ ISD::ArgFlagsTy Flags = Outs[realArgIdx].Flags;
3126
+ if (VA.getLocInfo() == CCValAssign::Indirect)
3127
+ return false;
3128
+ if (VA.needsCustom() && (RegVT == MVT::f64 || RegVT == MVT::v2f64)) {
3129
+ // f64 and vector types are split into multiple registers or
3130
+ // register/stack-slot combinations. The types will not match
3131
+ // the registers; give up on memory f64 refs until we figure
3132
+ // out what to do about this.
3133
+ if (!VA.isRegLoc())
3134
+ return false;
3135
+ if (!ArgLocs[++i].isRegLoc())
3136
+ return false;
3137
+ if (RegVT == MVT::v2f64) {
3138
+ if (!ArgLocs[++i].isRegLoc())
3139
+ return false;
3140
+ if (!ArgLocs[++i].isRegLoc())
3141
+ return false;
3142
+ }
3143
+ } else if (!VA.isRegLoc()) {
3144
+ if (!MatchingStackOffset(Arg, VA.getLocMemOffset(), Flags,
3145
+ MFI, MRI, TII))
3146
+ return false;
3147
+ }
3148
+ }
3149
+ }
3092
3150
3093
- // If the stack arguments for this call do not fit into our own save area then
3094
- // the call cannot be made tail.
3095
- if (CCInfo.getStackSize() > AFI_Caller->getArgumentStackSize())
3096
- return false;
3151
+ const MachineRegisterInfo &MRI = MF.getRegInfo();
3152
+ if (!parametersInCSRMatch(MRI, CallerPreserved, ArgLocs, OutVals))
3153
+ return false;
3154
+ }
3097
3155
3098
- LLVM_DEBUG(dbgs() << "true\n");
3099
3156
return true;
3100
3157
}
3101
3158
0 commit comments