Skip to content

[ARM] musttail fixes #102896

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 7 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
118 changes: 27 additions & 91 deletions llvm/lib/Target/ARM/ARMISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2961,50 +2961,6 @@ void ARMTargetLowering::HandleByVal(CCState *State, unsigned &Size,
Size = std::max<int>(Size - Excess, 0);
}

/// MatchingStackOffset - Return true if the given stack call argument is
/// already available in the same position (relatively) of the caller's
/// incoming argument stack.
static
bool MatchingStackOffset(SDValue Arg, unsigned Offset, ISD::ArgFlagsTy Flags,
MachineFrameInfo &MFI, const MachineRegisterInfo *MRI,
const TargetInstrInfo *TII) {
unsigned Bytes = Arg.getValueSizeInBits() / 8;
int FI = std::numeric_limits<int>::max();
if (Arg.getOpcode() == ISD::CopyFromReg) {
Register VR = cast<RegisterSDNode>(Arg.getOperand(1))->getReg();
if (!VR.isVirtual())
return false;
MachineInstr *Def = MRI->getVRegDef(VR);
if (!Def)
return false;
if (!Flags.isByVal()) {
if (!TII->isLoadFromStackSlot(*Def, FI))
return false;
} else {
return false;
}
} else if (LoadSDNode *Ld = dyn_cast<LoadSDNode>(Arg)) {
if (Flags.isByVal())
// ByVal argument is passed in as a pointer but it's now being
// dereferenced. e.g.
// define @foo(%struct.X* %A) {
// tail call @bar(%struct.X* byval %A)
// }
return false;
SDValue Ptr = Ld->getBasePtr();
FrameIndexSDNode *FINode = dyn_cast<FrameIndexSDNode>(Ptr);
if (!FINode)
return false;
FI = FINode->getIndex();
} else
return false;

assert(FI != std::numeric_limits<int>::max());
if (!MFI.isFixedObjectIndex(FI))
return false;
return Offset == MFI.getObjectOffset(FI) && Bytes == MFI.getObjectSize(FI);
}

/// IsEligibleForTailCallOptimization - Check whether the call is eligible
/// for tail call optimization. Targets which want to do tail call
/// optimization should implement this function. Note that this function also
Expand Down Expand Up @@ -3046,8 +3002,10 @@ bool ARMTargetLowering::IsEligibleForTailCallOptimization(
for (const CCValAssign &AL : ArgLocs)
if (AL.isRegLoc())
AddressRegisters.erase(AL.getLocReg());
if (AddressRegisters.empty())
if (AddressRegisters.empty()) {
LLVM_DEBUG(dbgs() << "false (no space for target address)\n");
return false;
}
}

// Look for obvious safe cases to perform tail call optimization that do not
Expand All @@ -3056,18 +3014,26 @@ bool ARMTargetLowering::IsEligibleForTailCallOptimization(
// Exception-handling functions need a special set of instructions to indicate
// a return to the hardware. Tail-calling another function would probably
// break this.
if (CallerF.hasFnAttribute("interrupt"))
if (CallerF.hasFnAttribute("interrupt")) {
LLVM_DEBUG(dbgs() << "false (interrupt attribute)\n");
return false;
}

if (canGuaranteeTCO(CalleeCC, getTargetMachine().Options.GuaranteedTailCallOpt))
if (canGuaranteeTCO(CalleeCC,
getTargetMachine().Options.GuaranteedTailCallOpt)) {
LLVM_DEBUG(dbgs() << (CalleeCC == CallerCC ? "true" : "false")
<< " (guaranteed tail-call CC)\n");
return CalleeCC == CallerCC;
}

// Also avoid sibcall optimization if either caller or callee uses struct
// return semantics.
bool isCalleeStructRet = Outs.empty() ? false : Outs[0].Flags.isSRet();
bool isCallerStructRet = MF.getFunction().hasStructRetAttr();
if (isCalleeStructRet || isCallerStructRet)
if (isCalleeStructRet != isCallerStructRet) {
LLVM_DEBUG(dbgs() << "false (mismatched sret)\n");
return false;
}

// Externally-defined functions with weak linkage should not be
// tail-called on ARM when the OS does not support dynamic
Expand All @@ -3080,8 +3046,11 @@ bool ARMTargetLowering::IsEligibleForTailCallOptimization(
const GlobalValue *GV = G->getGlobal();
const Triple &TT = getTargetMachine().getTargetTriple();
if (GV->hasExternalWeakLinkage() &&
(!TT.isOSWindows() || TT.isOSBinFormatELF() || TT.isOSBinFormatMachO()))
(!TT.isOSWindows() || TT.isOSBinFormatELF() ||
TT.isOSBinFormatMachO())) {
LLVM_DEBUG(dbgs() << "false (external weak linkage)\n");
return false;
}
}

// Check that the call results are passed in the same way.
Expand Down Expand Up @@ -3110,50 +3079,17 @@ bool ARMTargetLowering::IsEligibleForTailCallOptimization(

// If the callee takes no arguments then go on to check the results of the
// call.
if (!Outs.empty()) {
if (CCInfo.getStackSize()) {
// Check if the arguments are already laid out in the right way as
// the caller's fixed stack objects.
MachineFrameInfo &MFI = MF.getFrameInfo();
const MachineRegisterInfo *MRI = &MF.getRegInfo();
const TargetInstrInfo *TII = Subtarget->getInstrInfo();
for (unsigned i = 0, realArgIdx = 0, e = ArgLocs.size();
i != e;
++i, ++realArgIdx) {
CCValAssign &VA = ArgLocs[i];
EVT RegVT = VA.getLocVT();
SDValue Arg = OutVals[realArgIdx];
ISD::ArgFlagsTy Flags = Outs[realArgIdx].Flags;
if (VA.getLocInfo() == CCValAssign::Indirect)
return false;
if (VA.needsCustom() && (RegVT == MVT::f64 || RegVT == MVT::v2f64)) {
// f64 and vector types are split into multiple registers or
// register/stack-slot combinations. The types will not match
// the registers; give up on memory f64 refs until we figure
// out what to do about this.
if (!VA.isRegLoc())
return false;
if (!ArgLocs[++i].isRegLoc())
return false;
if (RegVT == MVT::v2f64) {
if (!ArgLocs[++i].isRegLoc())
return false;
if (!ArgLocs[++i].isRegLoc())
return false;
}
} else if (!VA.isRegLoc()) {
if (!MatchingStackOffset(Arg, VA.getLocMemOffset(), Flags,
MFI, MRI, TII))
return false;
}
}
}

const MachineRegisterInfo &MRI = MF.getRegInfo();
if (!parametersInCSRMatch(MRI, CallerPreserved, ArgLocs, OutVals))
return false;
const MachineRegisterInfo &MRI = MF.getRegInfo();
if (!parametersInCSRMatch(MRI, CallerPreserved, ArgLocs, OutVals)) {
LLVM_DEBUG(dbgs() << "false (parameters in CSRs do not match)\n");
return false;
}

// If the stack arguments for this call do not fit into our own save area then
// the call cannot be made tail.
if (CCInfo.getStackSize() > AFI_Caller->getArgumentStackSize())
return false;

return true;
}

Expand Down
23 changes: 22 additions & 1 deletion llvm/test/CodeGen/ARM/fp-arg-shuffle.ll
Original file line number Diff line number Diff line change
@@ -1,9 +1,30 @@
; RUN: llc -mtriple=arm-eabi -mattr=+neon -float-abi=soft %s -o - | FileCheck %s

; CHECK: function1
; CHECK-NOT: vmov
define double @function1(double %a, double %b, double %c, double %d, double %e, double %f) nounwind noinline ssp {
entry:
; CHECK-LABEL: function1:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: .save {r4, r5, r11, lr}
; CHECK-NEXT: push {r4, r5, r11, lr}
; CHECK-NEXT: vldr d16, [sp, #40]
; CHECK-NEXT: vldr d17, [sp, #32]
; CHECK-NEXT: vmov r12, lr, d16
; CHECK-NEXT: vldr d16, [sp, #16]
; CHECK-NEXT: vmov r4, r5, d17
; CHECK-NEXT: vldr d17, [sp, #24]
; CHECK-NEXT: str r3, [sp, #36]
; CHECK-NEXT: str r2, [sp, #32]
; CHECK-NEXT: str r1, [sp, #44]
; CHECK-NEXT: str r0, [sp, #40]
; CHECK-NEXT: vstr d17, [sp, #16]
; CHECK-NEXT: vstr d16, [sp, #24]
; CHECK-NEXT: mov r0, r12
; CHECK-NEXT: mov r1, lr
; CHECK-NEXT: mov r2, r4
; CHECK-NEXT: mov r3, r5
; CHECK-NEXT: pop {r4, r5, r11, lr}
; CHECK-NEXT: b function2
%call = tail call double @function2(double %f, double %e, double %d, double %c, double %b, double %a) nounwind
ret double %call
}
Expand Down
41 changes: 14 additions & 27 deletions llvm/test/CodeGen/ARM/fp16-vector-argument.ll
Original file line number Diff line number Diff line change
Expand Up @@ -145,26 +145,21 @@ entry:
define void @many_args_test(double, float, i16, <4 x half>, <8 x half>, <8 x half>, <8 x half>) {
; SOFT-LABEL: many_args_test:
; SOFT: @ %bb.0: @ %entry
; SOFT-NEXT: push {r11, lr}
; SOFT-NEXT: sub sp, sp, #32
; SOFT-NEXT: add r12, sp, #80
; SOFT-NEXT: add r12, sp, #40
; SOFT-NEXT: vld1.64 {d16, d17}, [r12]
; SOFT-NEXT: add r12, sp, #48
; SOFT-NEXT: add r12, sp, #8
; SOFT-NEXT: vabs.f16 q8, q8
; SOFT-NEXT: vld1.64 {d18, d19}, [r12]
; SOFT-NEXT: add r12, sp, #64
; SOFT-NEXT: add r12, sp, #24
; SOFT-NEXT: vadd.f16 q8, q8, q9
; SOFT-NEXT: vld1.64 {d18, d19}, [r12]
; SOFT-NEXT: add r12, sp, #16
; SOFT-NEXT: vmul.f16 q8, q9, q8
; SOFT-NEXT: vst1.64 {d16, d17}, [r12]
; SOFT-NEXT: mov r12, sp
; SOFT-NEXT: vldr d16, [sp, #40]
; SOFT-NEXT: vst1.16 {d16}, [r12:64]!
; SOFT-NEXT: str r3, [r12]
; SOFT-NEXT: bl use
; SOFT-NEXT: add sp, sp, #32
; SOFT-NEXT: pop {r11, pc}
; SOFT-NEXT: vldr d16, [sp]
; SOFT-NEXT: vstr d16, [sp]
; SOFT-NEXT: str r3, [sp, #8]
; SOFT-NEXT: b use
;
; HARD-LABEL: many_args_test:
; HARD: @ %bb.0: @ %entry
Expand All @@ -177,33 +172,25 @@ define void @many_args_test(double, float, i16, <4 x half>, <8 x half>, <8 x hal
;
; SOFTEB-LABEL: many_args_test:
; SOFTEB: @ %bb.0: @ %entry
; SOFTEB-NEXT: .save {r11, lr}
; SOFTEB-NEXT: push {r11, lr}
; SOFTEB-NEXT: .pad #32
; SOFTEB-NEXT: sub sp, sp, #32
; SOFTEB-NEXT: add r12, sp, #80
; SOFTEB-NEXT: mov lr, sp
; SOFTEB-NEXT: add r12, sp, #40
; SOFTEB-NEXT: vld1.64 {d16, d17}, [r12]
; SOFTEB-NEXT: add r12, sp, #48
; SOFTEB-NEXT: add r12, sp, #8
; SOFTEB-NEXT: vrev64.16 q8, q8
; SOFTEB-NEXT: vabs.f16 q8, q8
; SOFTEB-NEXT: vld1.64 {d18, d19}, [r12]
; SOFTEB-NEXT: add r12, sp, #64
; SOFTEB-NEXT: add r12, sp, #24
; SOFTEB-NEXT: vrev64.16 q9, q9
; SOFTEB-NEXT: vadd.f16 q8, q8, q9
; SOFTEB-NEXT: vld1.64 {d18, d19}, [r12]
; SOFTEB-NEXT: add r12, sp, #16
; SOFTEB-NEXT: vrev64.16 q9, q9
; SOFTEB-NEXT: vmul.f16 q8, q9, q8
; SOFTEB-NEXT: vldr d18, [sp, #40]
; SOFTEB-NEXT: vrev64.16 d18, d18
; SOFTEB-NEXT: vst1.16 {d18}, [lr:64]!
; SOFTEB-NEXT: str r3, [lr]
; SOFTEB-NEXT: vldr d18, [sp]
; SOFTEB-NEXT: vrev64.16 q8, q8
; SOFTEB-NEXT: vst1.64 {d16, d17}, [r12]
; SOFTEB-NEXT: bl use
; SOFTEB-NEXT: add sp, sp, #32
; SOFTEB-NEXT: pop {r11, pc}
; SOFTEB-NEXT: vstr d18, [sp]
; SOFTEB-NEXT: str r3, [sp, #8]
; SOFTEB-NEXT: b use
;
; HARDEB-LABEL: many_args_test:
; HARDEB: @ %bb.0: @ %entry
Expand Down
43 changes: 43 additions & 0 deletions llvm/test/CodeGen/ARM/musttail.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
; RUN: llc -mtriple=arm-eabi %s -o - | FileCheck %s
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Please generate checks with update_llc_test_checks.py


; The repro example from https://github.com/llvm/llvm-project/issues/57069#issuecomment-1212754850
; Function Attrs: mustprogress nofree noinline norecurse nosync nounwind sspstrong willreturn memory(none)
define hidden noundef i32 @many_args_callee(i32 noundef %0, i32 noundef %1, i32 noundef %2, i32 noundef %3, i32 noundef %4, i32 noundef %5) local_unnamed_addr #0 {
%7 = add nsw i32 %1, %0
%8 = add nsw i32 %7, %2
%9 = add nsw i32 %8, %3
%10 = add nsw i32 %9, %4
%11 = add nsw i32 %10, %5
ret i32 %11
}

; Function Attrs: mustprogress nofree norecurse nosync nounwind sspstrong willreturn memory(none)
define hidden noundef i32 @many_args(i32 noundef %0, i32 noundef %1, i32 noundef %2, i32 noundef %3, i32 noundef %4, i32 noundef %5) local_unnamed_addr #1 {
; CHECK: b many_args_callee
%7 = musttail call noundef i32 @many_args_callee(i32 noundef 1, i32 noundef 2, i32 noundef 3, i32 noundef 4, i32 noundef 5, i32 noundef 6)
ret i32 %7
}

; Test with sret
; Function Attrs: optsize
declare dso_local void @sret_callee(ptr dead_on_unwind writable sret({ double, double }) align 8, i16 noundef signext) local_unnamed_addr #1

; Function Attrs: mustprogress optsize
define dso_local void @sret_caller(ptr dead_on_unwind noalias writable sret({ double, double }) align 8 %agg.result, i16 noundef signext %P0) local_unnamed_addr #0 {
entry:
; CHECK: b sret_callee
musttail call void @sret_callee(ptr dead_on_unwind writable sret({ double, double }) align 8 %agg.result, i16 noundef signext 20391) #2
ret void
}

%struct.Large = type { [60 x i32] }

; Function Attrs: mustprogress noinline optnone
define dso_local void @large_caller(i64 noundef %0, i64 noundef %1, %struct.Large* noundef byval(%struct.Large) align 4 %2, %struct.Large* noundef byval(%struct.Large) align 4 %3) #0 {
entry:
; CHECK: b large_callee
musttail call void @large_callee(i64 noundef %0, i64 noundef %1, %struct.Large* noundef byval(%struct.Large) align 4 %2, %struct.Large* noundef byval(%struct.Large) align 4 %3)
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Please add tests for both "%0, %1, %2, %3" and "%0, %1, %3, %2".

ret void
}

declare dso_local void @large_callee(i64 noundef, i64 noundef, %struct.Large* noundef byval(%struct.Large) align 4, %struct.Large* noundef byval(%struct.Large) align 4) #1
Loading